On 02/06/19(Sun) 16:41, Martin Pieuchot wrote:
> On 01/06/19(Sat) 18:55, Martin Pieuchot wrote:
> > Diff below exists mainly for documentation and test purposes. If
> > you're not interested about how to break the scheduler internals in
> > pieces, don't read further and go straight to testing!
> >
> > - First change is to stop calling tsleep(9) at PUSER. That makes
> > it clear that all "sleeping priorities" are smaller than PUSER.
> > That's important to understand for the diff below. `p_priority'
> > is currently a placeholder for the "sleeping priority" and the
> > "runnqueue priority". Both fields are separated by this diff.
> >
> > - When a thread goes to sleep, the priority argument of tsleep(9) is
> > now recorded in `p_slpprio'. This argument can be considered as part
> > of the sleep queue. Its purpose is to place the thread into a higher
> > runqueue when awoken.
> >
> > - Currently, for stopped threads, `p_priority' correspond to `p_usrpri'.
> > So setrunnable() has been untangled to place SSTOP and SSLEEP threads
> > in the preferred queue without having to use `p_priority'. Note that
> > `p_usrpri' is still recalculated *after* having called setrunqueue().
> > This is currently fine because setrunnable() is called with SCHED_LOCK()
> > but it will be racy when we'll split it.
> >
> > - A new field, `p_runprio' has been introduced. It should be considered
> > as part of the per-CPU runqueues. It indicates where a current thread
> > is placed.
> >
> > - `spc_curpriority' is now updated at every context-switch. That means
> > need_resched() won't be called after comparing an out-of-date value.
> > At the same time, `p_usrpri' is initialized to the highest possible
> > value for idle threads.
> >
> > - resched_proc() was calling need_resched() in the following conditions:
> > - If the SONPROC thread has a higher priority that the current
> > running thread (itself).
> > - Twice in setrunnable() when we know that p_priority <= p_usrpri.
> > - If schedcpu() considered that a thread, after updating its prio,
> > should preempt the one running on the CPU pointed by `p_cpu'.
> >
> > The diff below simplify all of that by calling need_resched() when:
> > - A thread is inserted in a CPU runqueue at a higher priority than
> > the one SONPROC.
> > - schedcpu() decides that a thread in SRUN state should preempt the
> > one SONPROC.
> >
> > - `p_estcpu' `p_usrpri' and `p_slptime' which represent the "priority"
> > of a thread are now updated while holding a per-thread mutex. As a
> > result schedclock() and donice() no longer takes the SCHED_LOCK(),
> > and schedcpu() almost never take it.
> >
> > - With this diff top(1) and ps(1) will report the "real" `p_usrpi' value
> > when displaying priorities. This is helpful to understand what's
> > happening:
> >
> > load averages: 0.99, 0.56, 0.25 two.lab.grenadille.net
> > 23:42:10
> > 70 threads: 68 idle, 2 on processor up
> > 0:09
> > CPU0: 0.0% user, 0.0% nice, 51.0% sys, 2.0% spin, 0.0% intr, 47.1% idle
> > CPU1: 2.0% user, 0.0% nice, 51.0% sys, 3.9% spin, 0.0% intr, 43.1% idle
> > Memory: Real: 47M/1005M act/tot Free: 2937M Cache: 812M Swap: 0K/4323M
> >
> > PID TID PRI NICE SIZE RES STATE WAIT TIME CPU COMMAND
> > 81000 145101 72 0 0K 1664K sleep/1 bored 1:15 36.96% softnet
> > 47133 244097 73 0 2984K 4408K sleep/1 netio 1:06 35.06% cvs
> > 64749 522184 66 0 176K 148K onproc/1 - 0:55 28.81% nfsd
> > 21615 602473 127 0 0K 1664K sleep/0 - 7:22 0.00% idle0
> > 12413 606242 127 0 0K 1664K sleep/1 - 7:08 0.00% idle1
> > 85778 338258 50 0 4936K 7308K idle select 0:10 0.00% ssh
> > 22771 575513 50 0 176K 148K sleep/0 nfsd 0:02 0.00% nfsd
> > ....
> >
> >
> > - The removal of `p_priority' and the change that makes mi_switch()
> > always update `spc_curpriority' might introduce some changes in
> > behavior, especially with kernel threads that were not going through
> > tsleep(9). We currently have some situations where the priority of
> > the running thread isn't correctly reflected. This diff changes that
> > which means we should be able to better understand where the problems
> > are.
> >
> > I'd be interested in comments/tests/reviews before continuing in this
> > direction. Note that at least part of this diff are required to split
> > the accounting apart from the SCHED_LOCK() as well.
> >
> > I'll also work on exporting scheduler statistics unless somebody wants
> > to beat me :)
>
> Updated diff to use IPL_SCHED and rebased to apply on top of -current :)
Updated diff that fixes a pagefault reported by sthen@.
Index: arch/amd64/amd64/genassym.cf
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/genassym.cf,v
retrieving revision 1.40
diff -u -p -r1.40 genassym.cf
--- arch/amd64/amd64/genassym.cf 17 May 2019 19:07:15 -0000 1.40
+++ arch/amd64/amd64/genassym.cf 1 Jun 2019 16:27:46 -0000
@@ -32,7 +32,6 @@ export VM_MIN_KERNEL_ADDRESS
struct proc
member p_addr
-member p_priority
member p_stat
member p_wchan
member P_MD_REGS p_md.md_regs
Index: arch/hppa/hppa/genassym.cf
===================================================================
RCS file: /cvs/src/sys/arch/hppa/hppa/genassym.cf,v
retrieving revision 1.47
diff -u -p -r1.47 genassym.cf
--- arch/hppa/hppa/genassym.cf 9 Feb 2015 08:20:13 -0000 1.47
+++ arch/hppa/hppa/genassym.cf 1 Jun 2019 17:21:44 -0000
@@ -130,7 +130,6 @@ member tf_cr30
# proc fields and values
struct proc
member p_addr
-member p_priority
member p_stat
member p_wchan
member p_md
Index: arch/i386/i386/esm.c
===================================================================
RCS file: /cvs/src/sys/arch/i386/i386/esm.c,v
retrieving revision 1.59
diff -u -p -r1.59 esm.c
--- arch/i386/i386/esm.c 8 Sep 2015 07:12:56 -0000 1.59
+++ arch/i386/i386/esm.c 1 Jun 2019 16:05:18 -0000
@@ -331,7 +331,7 @@ esm_watchdog(void *arg, int period)
* should have a process context we can sleep in.
*/
while (sc->sc_step != 0) {
- if (tsleep(sc, PUSER | PCATCH, "esm", 0) == EINTR) {
+ if (tsleep(sc, PWAIT | PCATCH, "esm", 0) == EINTR) {
splx(s);
return (sc->sc_wdog_period);
}
Index: arch/i386/i386/genassym.cf
===================================================================
RCS file: /cvs/src/sys/arch/i386/i386/genassym.cf,v
retrieving revision 1.47
diff -u -p -r1.47 genassym.cf
--- arch/i386/i386/genassym.cf 22 Jun 2018 13:21:14 -0000 1.47
+++ arch/i386/i386/genassym.cf 1 Jun 2019 16:27:58 -0000
@@ -72,7 +72,6 @@ export VM_MAXUSER_ADDRESS
# proc fields and values
struct proc
member p_addr
-member p_priority
member p_stat
member p_wchan
member p_vmspace
Index: arch/m88k/m88k/m88k_machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/m88k/m88k/m88k_machdep.c,v
retrieving revision 1.69
diff -u -p -r1.69 m88k_machdep.c
--- arch/m88k/m88k/m88k_machdep.c 22 Oct 2018 17:31:24 -0000 1.69
+++ arch/m88k/m88k/m88k_machdep.c 1 Jun 2019 16:28:43 -0000
@@ -564,9 +564,7 @@ cpu_emergency_disable()
* to mi_switch().
*/
SCHED_LOCK(s);
- p->p_priority = p->p_usrpri;
- p->p_stat = SRUN;
- setrunqueue(p);
+ setrunqueue(p->p_cpu, p, p->p_usrpri);
p->p_ru.ru_nvcsw++;
SCHED_UNLOCK(s);
}
Index: arch/sparc64/sparc64/db_interface.c
===================================================================
RCS file: /cvs/src/sys/arch/sparc64/sparc64/db_interface.c,v
retrieving revision 1.51
diff -u -p -r1.51 db_interface.c
--- arch/sparc64/sparc64/db_interface.c 23 Mar 2019 05:47:23 -0000 1.51
+++ arch/sparc64/sparc64/db_interface.c 1 Jun 2019 17:22:32 -0000
@@ -964,10 +964,10 @@ db_proc_cmd(addr, have_addr, count, modi
return;
}
db_printf("process %p:", p);
- db_printf("pid:%d vmspace:%p pmap:%p ctx:%x wchan:%p pri:%d upri:%d\n",
+ db_printf("pid:%d vmspace:%p pmap:%p ctx:%x wchan:%p spri:%d upri:%d\n",
p->p_p->ps_pid, p->p_vmspace, p->p_vmspace->vm_map.pmap,
p->p_vmspace->vm_map.pmap->pm_ctx,
- p->p_wchan, p->p_priority, p->p_usrpri);
+ p->p_wchan, p->p_slpprio, p->p_usrpri);
db_printf("maxsaddr:%p ssiz:%dpg or %llxB\n",
p->p_vmspace->vm_maxsaddr, p->p_vmspace->vm_ssize,
(unsigned long long)ptoa(p->p_vmspace->vm_ssize));
Index: dev/pci/drm/drm_linux.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
retrieving revision 1.37
diff -u -p -r1.37 drm_linux.c
--- dev/pci/drm/drm_linux.c 4 Jun 2019 12:08:22 -0000 1.37
+++ dev/pci/drm/drm_linux.c 6 Jun 2019 18:11:10 -0000
@@ -116,7 +116,7 @@ wake_up_process(struct proc *p)
atomic_cas_ptr(&sch_proc, p, NULL);
if (p->p_wchan) {
if (p->p_stat == SSLEEP) {
- setrunnable(p);
+ setrunnable(p, p->p_slpprio);
r = 1;
} else
unsleep(p);
Index: dev/pci/drm/i915/intel_breadcrumbs.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_breadcrumbs.c,v
retrieving revision 1.1
diff -u -p -r1.1 intel_breadcrumbs.c
--- dev/pci/drm/i915/intel_breadcrumbs.c 14 Apr 2019 10:14:52 -0000
1.1
+++ dev/pci/drm/i915/intel_breadcrumbs.c 1 Jun 2019 16:30:43 -0000
@@ -451,7 +451,7 @@ static bool __intel_engine_add_wait(stru
#ifdef __linux__
if (wait->tsk->prio > to_wait(parent)->tsk->prio) {
#else
- if (wait->tsk->p_priority >
to_wait(parent)->tsk->p_priority) {
+ if (wait->tsk->p_usrpri >
to_wait(parent)->tsk->p_usrpri) {
#endif
p = &parent->rb_right;
first = false;
@@ -538,7 +538,7 @@ static inline bool chain_wakeup(struct r
#else
static inline bool chain_wakeup(struct rb_node *rb, int priority)
{
- return rb && to_wait(rb)->tsk->p_priority <= priority;
+ return rb && to_wait(rb)->tsk->p_usrpri <= priority;
}
#endif
@@ -558,7 +558,7 @@ static inline int wakeup_priority(struct
if (p == b->signaler)
return INT_MIN;
else
- return p->p_priority;
+ return p->p_usrpri;
}
#endif
Index: kern/init_main.c
===================================================================
RCS file: /cvs/src/sys/kern/init_main.c,v
retrieving revision 1.288
diff -u -p -r1.288 init_main.c
--- kern/init_main.c 2 Jun 2019 03:58:28 -0000 1.288
+++ kern/init_main.c 2 Jun 2019 18:46:01 -0000
@@ -200,6 +200,7 @@ main(void *framep)
*/
curproc = p = &proc0;
p->p_cpu = curcpu();
+ mtx_init(&p->p_mtx, IPL_SCHED);
/*
* Initialize timeouts.
Index: kern/kern_exit.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_exit.c,v
retrieving revision 1.176
diff -u -p -r1.176 kern_exit.c
--- kern/kern_exit.c 1 Jun 2019 14:11:17 -0000 1.176
+++ kern/kern_exit.c 1 Jun 2019 16:04:29 -0000
@@ -164,7 +164,7 @@ exit1(struct proc *p, int rv, int flags)
if ((p->p_flag & P_THREAD) == 0) {
/* main thread gotta wait because it has the pid, et al */
while (pr->ps_refcnt > 1)
- tsleep(&pr->ps_threads, PUSER, "thrdeath", 0);
+ tsleep(&pr->ps_threads, PWAIT, "thrdeath", 0);
if (pr->ps_flags & PS_PROFIL)
stopprofclock(pr);
}
Index: kern/kern_fork.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.212
diff -u -p -r1.212 kern_fork.c
--- kern/kern_fork.c 1 Jun 2019 14:11:17 -0000 1.212
+++ kern/kern_fork.c 2 Jun 2019 18:41:40 -0000
@@ -146,11 +146,13 @@ sys___tfork(struct proc *p, void *v, reg
struct proc *
thread_new(struct proc *parent, vaddr_t uaddr)
{
- struct proc *p;
+ struct proc *p;
p = pool_get(&proc_pool, PR_WAITOK);
p->p_stat = SIDL; /* protect against others */
+ p->p_runprio = 0;
p->p_flag = 0;
+ mtx_init(&p->p_mtx, IPL_SCHED);
/*
* Make a proc table entry for the new process.
@@ -169,13 +171,6 @@ thread_new(struct proc *parent, vaddr_t
*/
timeout_set(&p->p_sleep_to, endtsleep, p);
- /*
- * set priority of child to be that of parent
- * XXX should move p_estcpu into the region of struct proc which gets
- * copied.
- */
- scheduler_fork_hook(parent, p);
-
#ifdef WITNESS
p->p_sleeplocks = NULL;
#endif
@@ -328,9 +323,8 @@ fork_thread_start(struct proc *p, struct
int s;
SCHED_LOCK(s);
- p->p_stat = SRUN;
- p->p_cpu = sched_choosecpu_fork(parent, flags);
- setrunqueue(p);
+ p->p_cpu = parent->p_cpu;
+ setrunqueue(NULL, p, p->p_usrpri);
SCHED_UNLOCK(s);
}
Index: kern/kern_proc.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_proc.c,v
retrieving revision 1.85
diff -u -p -r1.85 kern_proc.c
--- kern/kern_proc.c 12 Nov 2018 15:09:17 -0000 1.85
+++ kern/kern_proc.c 1 Jun 2019 16:36:57 -0000
@@ -475,8 +475,8 @@ proc_printit(struct proc *p, const char
(*pr)("PROC (%s) pid=%d stat=%s\n", p->p_p->ps_comm, p->p_tid, pst);
(*pr)(" flags process=%b proc=%b\n",
p->p_p->ps_flags, PS_BITS, p->p_flag, P_BITS);
- (*pr)(" pri=%u, usrpri=%u, nice=%d\n",
- p->p_priority, p->p_usrpri, p->p_p->ps_nice);
+ (*pr)(" slpprio=%u, usrpri=%u, nice=%d\n",
+ p->p_slpprio, p->p_usrpri, p->p_p->ps_nice);
(*pr)(" forw=%p, list=%p,%p\n",
TAILQ_NEXT(p, p_runq), p->p_list.le_next, p->p_list.le_prev);
(*pr)(" process=%p user=%p, vmspace=%p\n",
Index: kern/kern_resource.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_resource.c,v
retrieving revision 1.63
diff -u -p -r1.63 kern_resource.c
--- kern/kern_resource.c 2 Jun 2019 03:58:28 -0000 1.63
+++ kern/kern_resource.c 2 Jun 2019 18:46:01 -0000
@@ -180,7 +180,6 @@ donice(struct proc *curp, struct process
{
struct ucred *ucred = curp->p_ucred;
struct proc *p;
- int s;
if (ucred->cr_uid != 0 && ucred->cr_ruid != 0 &&
ucred->cr_uid != chgpr->ps_ucred->cr_uid &&
@@ -193,11 +192,12 @@ donice(struct proc *curp, struct process
n += NZERO;
if (n < chgpr->ps_nice && suser(curp))
return (EACCES);
+ TAILQ_FOREACH(p, &chgpr->ps_threads, p_thr_link) {
+ mtx_enter(&p->p_mtx);
+ resetpriority(p, p->p_estcpu, n);
+ mtx_leave(&p->p_mtx);
+ }
chgpr->ps_nice = n;
- SCHED_LOCK(s);
- TAILQ_FOREACH(p, &chgpr->ps_threads, p_thr_link)
- (void)resetpriority(p);
- SCHED_UNLOCK(s);
return (0);
}
Index: kern/kern_sched.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.58
diff -u -p -r1.58 kern_sched.c
--- kern/kern_sched.c 1 Jun 2019 14:11:17 -0000 1.58
+++ kern/kern_sched.c 1 Jun 2019 20:04:12 -0000
@@ -149,6 +149,7 @@ sched_idle(void *v)
cpuset_add(&sched_idle_cpus, ci);
p->p_stat = SSLEEP;
p->p_cpu = ci;
+ p->p_usrpri = MAXPRI;
atomic_setbits_int(&p->p_flag, P_CPUPEG);
mi_switch();
cpuset_del(&sched_idle_cpus, ci);
@@ -244,39 +245,59 @@ sched_init_runqueues(void)
}
void
-setrunqueue(struct proc *p)
+setrunqueue(struct cpu_info *ci, struct proc *p, uint8_t prio)
{
struct schedstate_percpu *spc;
- int queue = p->p_priority >> 2;
+ int queue = prio >> 2;
+
+ if (ci == NULL)
+ ci = sched_choosecpu(p);
+
+ KASSERT(ci != NULL);
+
+ p->p_cpu = ci;
+ p->p_stat = SRUN;
+ p->p_runprio = prio;
SCHED_ASSERT_LOCKED();
- spc = &p->p_cpu->ci_schedstate;
+
+ spc = &ci->ci_schedstate;
spc->spc_nrun++;
TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
spc->spc_whichqs |= (1 << queue);
- cpuset_add(&sched_queued_cpus, p->p_cpu);
+ cpuset_add(&sched_queued_cpus, ci);
- if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
- cpu_unidle(p->p_cpu);
+ if (cpuset_isset(&sched_idle_cpus, ci))
+ cpu_unidle(ci);
+
+ if (prio < spc->spc_curpriority)
+ need_resched(ci);
}
-void
-remrunqueue(struct proc *p)
+uint8_t
+remrunqueue(struct cpu_info *ci, struct proc *p)
{
- struct schedstate_percpu *spc;
- int queue = p->p_priority >> 2;
+ struct schedstate_percpu *spc = &ci->ci_schedstate;
+ uint8_t prio = p->p_runprio;
+ int queue = prio >> 2;
SCHED_ASSERT_LOCKED();
- spc = &p->p_cpu->ci_schedstate;
+
spc->spc_nrun--;
TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
spc->spc_whichqs &= ~(1 << queue);
if (spc->spc_whichqs == 0)
- cpuset_del(&sched_queued_cpus, p->p_cpu);
+ cpuset_del(&sched_queued_cpus, ci);
}
+
+ KASSERT(p->p_stat == SRUN);
+ KASSERT(p->p_cpu == ci);
+ p->p_runprio = 0;
+
+ return (prio);
}
struct proc *
@@ -293,10 +314,12 @@ sched_chooseproc(void)
if (spc->spc_whichqs) {
for (queue = 0; queue < SCHED_NQS; queue++) {
while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) {
- remrunqueue(p);
- p->p_cpu = sched_choosecpu(p);
- setrunqueue(p);
- if (p->p_cpu == curcpu()) {
+ struct cpu_info *ci;
+ uint8_t prio;
+
+ prio = remrunqueue(p->p_cpu, p);
+ setrunqueue(NULL, p, prio);
+ if (ci == curcpu()) {
KASSERT(p->p_flag & P_CPUPEG);
goto again;
}
@@ -315,7 +338,7 @@ again:
if (spc->spc_whichqs) {
queue = ffs(spc->spc_whichqs) - 1;
p = TAILQ_FIRST(&spc->spc_qs[queue]);
- remrunqueue(p);
+ remrunqueue(p->p_cpu, p);
sched_noidle++;
KASSERT(p->p_stat == SRUN);
} else if ((p = sched_steal_proc(curcpu())) == NULL) {
@@ -337,66 +360,10 @@ again:
}
KASSERT(p);
p->p_stat = SRUN;
- }
-
- KASSERT(p->p_wchan == NULL);
- return (p);
-}
-
-struct cpu_info *
-sched_choosecpu_fork(struct proc *parent, int flags)
-{
-#ifdef MULTIPROCESSOR
- struct cpu_info *choice = NULL;
- fixpt_t load, best_load = ~0;
- int run, best_run = INT_MAX;
- struct cpu_info *ci;
- struct cpuset set;
-
-#if 0
- /*
- * XXX
- * Don't do this until we have a painless way to move the cpu in exec.
- * Preferably when nuking the old pmap and getting a new one on a
- * new cpu.
- */
- /*
- * PPWAIT forks are simple. We know that the parent will not
- * run until we exec and choose another cpu, so we just steal its
- * cpu.
- */
- if (flags & FORK_PPWAIT)
- return (parent->p_cpu);
-#endif
-
- /*
- * Look at all cpus that are currently idle and have nothing queued.
- * If there are none, pick the one with least queued procs first,
- * then the one with lowest load average.
- */
- cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
- cpuset_intersection(&set, &set, &sched_all_cpus);
- if (cpuset_first(&set) == NULL)
- cpuset_copy(&set, &sched_all_cpus);
-
- while ((ci = cpuset_first(&set)) != NULL) {
- cpuset_del(&set, ci);
-
- load = ci->ci_schedstate.spc_ldavg;
- run = ci->ci_schedstate.spc_nrun;
-
- if (choice == NULL || run < best_run ||
- (run == best_run &&load < best_load)) {
- choice = ci;
- best_load = load;
- best_run = run;
- }
}
- return (choice);
-#else
- return (curcpu());
-#endif
+ KASSERT(p->p_wchan == NULL);
+ return (p);
}
struct cpu_info *
@@ -408,6 +375,8 @@ sched_choosecpu(struct proc *p)
struct cpu_info *ci;
struct cpuset set;
+ KASSERT(p->p_cpu != NULL);
+
/*
* If pegged to a cpu, don't allow it to move.
*/
@@ -509,8 +478,7 @@ sched_steal_proc(struct cpu_info *self)
if (best == NULL)
return (NULL);
- spc = &best->p_cpu->ci_schedstate;
- remrunqueue(best);
+ remrunqueue(best->p_cpu, best);
best->p_cpu = self;
sched_stolen++;
@@ -566,7 +534,7 @@ sched_proc_to_cpu_cost(struct cpu_info *
* and the higher the priority of the proc.
*/
if (!cpuset_isset(&sched_idle_cpus, ci)) {
- cost += (p->p_priority - spc->spc_curpriority) *
+ cost += (p->p_usrpri - spc->spc_curpriority) *
sched_cost_priority;
cost += sched_cost_runnable;
}
@@ -610,11 +578,8 @@ sched_peg_curproc(struct cpu_info *ci)
int s;
SCHED_LOCK(s);
- p->p_priority = p->p_usrpri;
- p->p_stat = SRUN;
- p->p_cpu = ci;
atomic_setbits_int(&p->p_flag, P_CPUPEG);
- setrunqueue(p);
+ setrunqueue(ci, p, p->p_usrpri);
p->p_ru.ru_nvcsw++;
mi_switch();
SCHED_UNLOCK(s);
Index: kern/kern_sig.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sig.c,v
retrieving revision 1.230
diff -u -p -r1.230 kern_sig.c
--- kern/kern_sig.c 13 May 2019 19:21:31 -0000 1.230
+++ kern/kern_sig.c 6 Jun 2019 18:14:25 -0000
@@ -890,6 +890,7 @@ ptsignal(struct proc *p, int signum, enu
struct process *pr = p->p_p;
struct proc *q;
int wakeparent = 0;
+ uint8_t stpprio = 0;
KERNEL_ASSERT_LOCKED();
@@ -1154,10 +1155,11 @@ runfast:
/*
* Raise priority to at least PUSER.
*/
- if (p->p_priority > PUSER)
- p->p_priority = PUSER;
+ stpprio = p->p_usrpri;
+ if (stpprio > PUSER)
+ stpprio = PUSER;
run:
- setrunnable(p);
+ setrunnable(p, stpprio ? stpprio : p->p_usrpri);
out:
SCHED_UNLOCK(s);
if (wakeparent)
@@ -1909,7 +1911,7 @@ userret(struct proc *p)
WITNESS_WARN(WARN_PANIC, NULL, "userret: returning");
- p->p_cpu->ci_schedstate.spc_curpriority = p->p_priority = p->p_usrpri;
+ p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
}
int
@@ -1995,7 +1997,7 @@ single_thread_set(struct proc *p, enum s
if (mode == SINGLE_EXIT) {
SCHED_LOCK(s);
if (q->p_stat == SSTOP) {
- setrunnable(q);
+ setrunnable(q, q->p_usrpri);
pr->ps_singlecount++;
}
SCHED_UNLOCK(s);
@@ -2019,13 +2021,13 @@ single_thread_set(struct proc *p, enum s
break;
}
/* need to unwind or exit, so wake it */
- setrunnable(q);
+ setrunnable(q, q->p_slpprio);
}
pr->ps_singlecount++;
break;
case SSTOP:
if (mode == SINGLE_EXIT) {
- setrunnable(q);
+ setrunnable(q, q->p_usrpri);
pr->ps_singlecount++;
}
break;
@@ -2050,7 +2052,7 @@ single_thread_wait(struct process *pr)
{
/* wait until they're all suspended */
while (pr->ps_singlecount > 0)
- tsleep(&pr->ps_singlecount, PUSER, "suspend", 0);
+ tsleep(&pr->ps_singlecount, PWAIT, "suspend", 0);
}
void
@@ -2079,7 +2081,7 @@ single_thread_clear(struct proc *p, int
SCHED_LOCK(s);
if (q->p_stat == SSTOP && (q->p_flag & flag) == 0) {
if (q->p_wchan == 0)
- setrunnable(q);
+ setrunnable(q, q->p_usrpri);
else
q->p_stat = SSLEEP;
}
Index: kern/kern_synch.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.148
diff -u -p -r1.148 kern_synch.c
--- kern/kern_synch.c 23 Apr 2019 13:35:12 -0000 1.148
+++ kern/kern_synch.c 1 Jun 2019 17:52:51 -0000
@@ -280,8 +280,10 @@ sleep_setup(struct sleep_state *sls, con
p->p_wchan = ident;
p->p_wmesg = wmesg;
+ mtx_enter(&p->p_mtx);
p->p_slptime = 0;
- p->p_priority = prio & PRIMASK;
+ mtx_leave(&p->p_mtx);
+ p->p_slpprio = prio & PRIMASK;
TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
}
@@ -304,7 +306,6 @@ sleep_finish(struct sleep_state *sls, in
panic("sleep_finish !SONPROC");
#endif
- p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
SCHED_UNLOCK(sls->sls_s);
/*
@@ -399,7 +400,7 @@ endtsleep(void *arg)
SCHED_LOCK(s);
if (p->p_wchan) {
if (p->p_stat == SSLEEP)
- setrunnable(p);
+ setrunnable(p, p->p_slpprio);
else
unsleep(p);
atomic_setbits_int(&p->p_flag, P_TIMEOUT);
@@ -454,7 +455,7 @@ wakeup_n(const volatile void *ident, int
p->p_wchan = 0;
TAILQ_REMOVE(qp, p, p_runq);
if (p->p_stat == SSLEEP)
- setrunnable(p);
+ setrunnable(p, p->p_slpprio);
}
}
SCHED_UNLOCK(s);
@@ -473,6 +474,7 @@ int
sys_sched_yield(struct proc *p, void *v, register_t *retval)
{
struct proc *q;
+ uint8_t newprio;
int s;
SCHED_LOCK(s);
@@ -481,11 +483,10 @@ sys_sched_yield(struct proc *p, void *v,
* sched_yield(2), drop its priority to ensure its siblings
* can make some progress.
*/
- p->p_priority = p->p_usrpri;
+ newprio = p->p_usrpri;
TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link)
- p->p_priority = max(p->p_priority, q->p_priority);
- p->p_stat = SRUN;
- setrunqueue(p);
+ newprio = max(newprio, q->p_runprio);
+ setrunqueue(p->p_cpu, p, newprio);
p->p_ru.ru_nvcsw++;
mi_switch();
SCHED_UNLOCK(s);
@@ -571,7 +572,7 @@ thrsleep(struct proc *p, struct sys___th
void *sleepaddr = &p->p_thrslpid;
if (ident == -1)
sleepaddr = &globalsleepaddr;
- error = tsleep(sleepaddr, PUSER | PCATCH, "thrsleep",
+ error = tsleep(sleepaddr, PWAIT | PCATCH, "thrsleep",
(int)to_ticks);
}
Index: kern/sched_bsd.c
===================================================================
RCS file: /cvs/src/sys/kern/sched_bsd.c,v
retrieving revision 1.53
diff -u -p -r1.53 sched_bsd.c
--- kern/sched_bsd.c 1 Jun 2019 14:11:17 -0000 1.53
+++ kern/sched_bsd.c 1 Jun 2019 21:26:33 -0000
@@ -61,8 +61,8 @@ int rrticks_init; /* # of hardclock tic
struct __mp_lock sched_lock;
#endif
-void schedcpu(void *);
-void updatepri(struct proc *);
+void schedcpu(void *);
+uint32_t decay_aftersleep(struct proc *, uint32_t, uint32_t);
void
scheduler_start(void)
@@ -206,6 +206,7 @@ schedcpu(void *arg)
struct proc *p;
int s;
unsigned int newcpu;
+ uint8_t newprio;
int phz;
/*
@@ -228,6 +229,7 @@ schedcpu(void *arg)
/*
* Increment sleep time (if sleeping). We ignore overflow.
*/
+ mtx_enter(&p->p_mtx);
if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
p->p_slptime++;
p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
@@ -235,9 +237,10 @@ schedcpu(void *arg)
* If the process has slept the entire second,
* stop recalculating its priority until it wakes up.
*/
- if (p->p_slptime > 1)
+ if (p->p_slptime > 1) {
+ mtx_leave(&p->p_mtx);
continue;
- SCHED_LOCK(s);
+ }
/*
* p_pctcpu is only for diagnostic tools such as ps.
*/
@@ -252,19 +255,26 @@ schedcpu(void *arg)
#endif
p->p_cpticks = 0;
newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu);
- p->p_estcpu = newcpu;
- resetpriority(p);
- if (p->p_priority >= PUSER) {
- if (p->p_stat == SRUN &&
- (p->p_priority / SCHED_PPQ) !=
- (p->p_usrpri / SCHED_PPQ)) {
- remrunqueue(p);
- p->p_priority = p->p_usrpri;
- setrunqueue(p);
- } else
- p->p_priority = p->p_usrpri;
+ newprio = resetpriority(p, newcpu, p->p_p->ps_nice);
+ mtx_leave(&p->p_mtx);
+
+ if (p->p_stat == SRUN) {
+ SCHED_LOCK(s);
+ if (p->p_stat == SRUN) {
+ struct schedstate_percpu *spc;
+ uint8_t runprio;
+
+ spc = &p->p_cpu->ci_schedstate;
+ runprio = p->p_runprio;
+ if ((runprio >= PUSER) &&
+ (SRUNQ(runprio) != SRUNQ(newprio))) {
+ remrunqueue(p->p_cpu, p);
+ setrunqueue(p->p_cpu, p, newprio);
+ } else if (newprio < spc->spc_curpriority)
+ need_resched(p->p_cpu);
+ }
+ SCHED_UNLOCK(s);
}
- SCHED_UNLOCK(s);
}
uvm_meter();
wakeup(&lbolt);
@@ -276,23 +286,23 @@ schedcpu(void *arg)
* For all load averages >= 1 and max p_estcpu of 255, sleeping for at
* least six times the loadfactor will decay p_estcpu to zero.
*/
-void
-updatepri(struct proc *p)
+uint32_t
+decay_aftersleep(struct proc *p, uint32_t estcpu, uint32_t slptime)
{
- unsigned int newcpu = p->p_estcpu;
fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
+ uint32_t newcpu;
- SCHED_ASSERT_LOCKED();
-
- if (p->p_slptime > 5 * loadfac)
- p->p_estcpu = 0;
+ if (slptime > 5 * loadfac)
+ newcpu = 0;
else {
- p->p_slptime--; /* the first time was done in schedcpu */
- while (newcpu && --p->p_slptime)
- newcpu = (int) decay_cpu(loadfac, newcpu);
- p->p_estcpu = newcpu;
+ newcpu = estcpu;
+ slptime--; /* the first time was done in schedcpu */
+ while (newcpu && --slptime)
+ newcpu = decay_cpu(loadfac, newcpu);
+
}
- resetpriority(p);
+
+ return (newcpu);
}
/*
@@ -308,9 +318,7 @@ yield(void)
NET_ASSERT_UNLOCKED();
SCHED_LOCK(s);
- p->p_priority = p->p_usrpri;
- p->p_stat = SRUN;
- setrunqueue(p);
+ setrunqueue(p->p_cpu, p, p->p_usrpri);
p->p_ru.ru_nvcsw++;
mi_switch();
SCHED_UNLOCK(s);
@@ -329,9 +337,7 @@ preempt(void)
int s;
SCHED_LOCK(s);
- p->p_priority = p->p_usrpri;
- p->p_stat = SRUN;
- setrunqueue(p);
+ setrunqueue(p->p_cpu, p, p->p_usrpri);
p->p_ru.ru_nivcsw++;
mi_switch();
SCHED_UNLOCK(s);
@@ -427,7 +433,9 @@ mi_switch(void)
*/
KASSERT(p->p_cpu == curcpu());
- nanouptime(&p->p_cpu->ci_schedstate.spc_runtime);
+ spc = &curcpu()->ci_schedstate;
+ spc->spc_curpriority = p->p_usrpri;
+ nanouptime(&spc->spc_runtime);
#ifdef MULTIPROCESSOR
/*
@@ -441,36 +449,13 @@ mi_switch(void)
#endif
}
-static __inline void
-resched_proc(struct proc *p, u_char pri)
-{
- struct cpu_info *ci;
-
- /*
- * XXXSMP
- * This does not handle the case where its last
- * CPU is running a higher-priority process, but every
- * other CPU is running a lower-priority process. There
- * are ways to handle this situation, but they're not
- * currently very pretty, and we also need to weigh the
- * cost of moving a process from one CPU to another.
- *
- * XXXSMP
- * There is also the issue of locking the other CPU's
- * sched state, which we currently do not do.
- */
- ci = (p->p_cpu != NULL) ? p->p_cpu : curcpu();
- if (pri < ci->ci_schedstate.spc_curpriority)
- need_resched(ci);
-}
-
/*
* Change process state to be runnable,
* placing it on the run queue if it is in memory,
* and awakening the swapper if it isn't in memory.
*/
void
-setrunnable(struct proc *p)
+setrunnable(struct proc *p, uint8_t slpprio)
{
SCHED_ASSERT_LOCKED();
@@ -493,13 +478,18 @@ setrunnable(struct proc *p)
unsleep(p); /* e.g. when sending signals */
break;
}
- p->p_stat = SRUN;
- p->p_cpu = sched_choosecpu(p);
- setrunqueue(p);
- if (p->p_slptime > 1)
- updatepri(p);
+ /* Put the process on any runqueue using its sleeping priority. */
+ setrunqueue(NULL, p, slpprio);
+
+ mtx_enter(&p->p_mtx);
+ if (p->p_slptime > 1) {
+ uint32_t newcpu;
+
+ newcpu = decay_aftersleep(p, p->p_estcpu, p->p_slptime);
+ resetpriority(p, newcpu, p->p_p->ps_nice);
+ }
p->p_slptime = 0;
- resched_proc(p, p->p_priority);
+ mtx_leave(&p->p_mtx);
}
/*
@@ -507,18 +497,18 @@ setrunnable(struct proc *p)
* Arrange to reschedule if the resulting priority is better
* than that of the current process.
*/
-void
-resetpriority(struct proc *p)
+uint8_t
+resetpriority(struct proc *p, uint32_t newcpu, uint8_t nice)
{
- unsigned int newpriority;
+ unsigned int newprio;
- SCHED_ASSERT_LOCKED();
+ newprio = min((PUSER + newcpu + NICE_WEIGHT * (nice - NZERO)), MAXPRI);
+
+ MUTEX_ASSERT_LOCKED(&p->p_mtx);
+ p->p_estcpu = newcpu;
+ p->p_usrpri = newprio;
- newpriority = PUSER + p->p_estcpu +
- NICE_WEIGHT * (p->p_p->ps_nice - NZERO);
- newpriority = min(newpriority, MAXPRI);
- p->p_usrpri = newpriority;
- resched_proc(p, p->p_usrpri);
+ return (newprio);
}
/*
@@ -540,17 +530,17 @@ schedclock(struct proc *p)
{
struct cpu_info *ci = curcpu();
struct schedstate_percpu *spc = &ci->ci_schedstate;
- int s;
+ uint32_t newcpu;
if (p == spc->spc_idleproc || spc->spc_spinning)
return;
- SCHED_LOCK(s);
- p->p_estcpu = ESTCPULIM(p->p_estcpu + 1);
- resetpriority(p);
- if (p->p_priority >= PUSER)
- p->p_priority = p->p_usrpri;
- SCHED_UNLOCK(s);
+ /* Only decay the priority if nobody is messing with it. */
+ if (!mtx_enter_try(&p->p_mtx))
+ return;
+ newcpu = ESTCPULIM(p->p_estcpu + 1);
+ resetpriority(p, newcpu, p->p_p->ps_nice);
+ mtx_leave(&p->p_mtx);
}
void (*cpu_setperf)(int);
Index: kern/sys_futex.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_futex.c,v
retrieving revision 1.12
diff -u -p -r1.12 sys_futex.c
--- kern/sys_futex.c 6 Feb 2019 15:11:20 -0000 1.12
+++ kern/sys_futex.c 1 Jun 2019 16:04:57 -0000
@@ -254,7 +254,7 @@ futex_wait(uint32_t *uaddr, uint32_t val
TAILQ_INSERT_TAIL(&f->ft_threads, p, p_fut_link);
p->p_futex = f;
- error = rwsleep(p, &ftlock, PUSER|PCATCH, "fsleep", (int)to_ticks);
+ error = rwsleep(p, &ftlock, PWAIT | PCATCH, "fsleep", (int)to_ticks);
if (error == ERESTART)
error = ECANCELED;
else if (error == EWOULDBLOCK) {
Index: kern/sys_generic.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_generic.c,v
retrieving revision 1.123
diff -u -p -r1.123 sys_generic.c
--- kern/sys_generic.c 21 Jan 2019 23:41:26 -0000 1.123
+++ kern/sys_generic.c 1 Jun 2019 15:59:16 -0000
@@ -806,7 +806,7 @@ selwakeup(struct selinfo *sip)
SCHED_LOCK(s);
if (p->p_wchan == (caddr_t)&selwait) {
if (p->p_stat == SSLEEP)
- setrunnable(p);
+ setrunnable(p, p->p_slpprio);
else
unsleep(p);
} else if (p->p_flag & P_SELECT)
Index: kern/sys_process.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_process.c,v
retrieving revision 1.80
diff -u -p -r1.80 sys_process.c
--- kern/sys_process.c 19 Feb 2018 09:25:13 -0000 1.80
+++ kern/sys_process.c 1 Jun 2019 16:18:41 -0000
@@ -493,7 +493,7 @@ ptrace_ctrl(struct proc *p, int req, pid
if (t->p_stat == SSTOP) {
t->p_xstat = data;
SCHED_LOCK(s);
- setrunnable(t);
+ setrunnable(t, t->p_usrpri);
SCHED_UNLOCK(s);
} else {
if (data != 0)
Index: kern/vfs_sync.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_sync.c,v
retrieving revision 1.60
diff -u -p -r1.60 vfs_sync.c
--- kern/vfs_sync.c 13 Aug 2018 15:26:17 -0000 1.60
+++ kern/vfs_sync.c 1 Jun 2019 16:20:49 -0000
@@ -245,7 +245,7 @@ speedup_syncer(void)
SCHED_LOCK(s);
if (syncerproc && syncerproc->p_wchan == &lbolt)
- setrunnable(syncerproc);
+ setrunnable(syncerproc, syncerproc->p_usrpri);
SCHED_UNLOCK(s);
if (rushjob < syncdelay / 2) {
rushjob += 1;
Index: sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.268
diff -u -p -r1.268 proc.h
--- sys/proc.h 1 Jun 2019 22:42:18 -0000 1.268
+++ sys/proc.h 2 Jun 2019 18:50:17 -0000
@@ -307,6 +307,7 @@ struct p_inentry {
/*
* Locks used to protect struct members in this file:
* s scheduler lock
+ * m `p_mtx'
*/
struct proc {
TAILQ_ENTRY(proc) p_runq; /* [s] current run/sleep queue */
@@ -317,6 +318,7 @@ struct proc {
TAILQ_ENTRY(proc) p_fut_link; /* Threads in a futex linkage. */
struct futex *p_futex; /* Current sleeping futex. */
+ struct mutex p_mtx;
/* substructures: */
struct filedesc *p_fd; /* copy of p_p->ps_fd */
@@ -328,7 +330,7 @@ struct proc {
int p_flag; /* P_* flags. */
u_char p_spare; /* unused */
char p_stat; /* [s] S* process status. */
- char p_pad1[1];
+ uint8_t p_runprio; /* [s] priority in SRUN. */
u_char p_descfd; /* if not 255, fdesc permits this fd */
pid_t p_tid; /* Thread identifier. */
@@ -341,13 +343,12 @@ struct proc {
long p_thrslpid; /* for thrsleep syscall */
/* scheduling */
- u_int p_estcpu; /* [s] Time averaged val of p_cpticks */
int p_cpticks; /* Ticks of cpu time. */
const volatile void *p_wchan; /* [s] Sleep address. */
struct timeout p_sleep_to;/* timeout for tsleep() */
const char *p_wmesg; /* [s] Reason for sleep. */
- fixpt_t p_pctcpu; /* [s] %cpu for this thread */
- u_int p_slptime; /* [s] Time since last blocked. */
+ fixpt_t p_pctcpu; /* [m] %cpu for this thread */
+ u_int p_slptime; /* [m] Time since last blocked. */
u_int p_uticks; /* Statclock hits in user mode. */
u_int p_sticks; /* Statclock hits in system mode. */
u_int p_iticks; /* Statclock hits processing intr. */
@@ -366,8 +367,13 @@ struct proc {
#define p_startcopy p_sigmask
sigset_t p_sigmask; /* Current signal mask. */
- u_char p_priority; /* [s] Process priority. */
- u_char p_usrpri; /* [s] User-prio based on p_estcpu & ps_nice. */
+ u_int p_spserial;
+ vaddr_t p_spstart;
+ vaddr_t p_spend;
+
+ u_char p_slpprio; /* [s] Sleeping priority. */
+ u_char p_usrpri; /* [m] Priority based on p_estcpu & ps_nice. */
+ u_int p_estcpu; /* [m] Time averaged val of p_cpticks */
int p_pledge_syscall; /* Cache of current syscall */
struct ucred *p_ucred; /* cached credentials */
@@ -550,8 +556,8 @@ void leavepgrp(struct process *);
void killjobc(struct process *);
void preempt(void);
void procinit(void);
-void resetpriority(struct proc *);
-void setrunnable(struct proc *);
+uint8_t resetpriority(struct proc *, uint32_t, uint8_t);
+void setrunnable(struct proc *, uint8_t);
void endtsleep(void *);
void unsleep(struct proc *);
void reaper(void *);
Index: sys/sched.h
===================================================================
RCS file: /cvs/src/sys/sys/sched.h,v
retrieving revision 1.52
diff -u -p -r1.52 sched.h
--- sys/sched.h 16 May 2019 13:52:47 -0000 1.52
+++ sys/sched.h 1 Jun 2019 21:14:35 -0000
@@ -137,6 +137,7 @@ struct cpustats {
#define SPCF_SHOULDHALT 0x0004 /* CPU should be vacated */
#define SPCF_HALTED 0x0008 /* CPU has been halted */
+#define SRUNQ(prio) ((prio) / SCHED_PPQ)
#define SCHED_PPQ (128 / SCHED_NQS) /* priorities per queue
*/
#define NICE_WEIGHT 2 /* priorities per nice level */
#define ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX - SCHED_PPQ)
@@ -179,13 +180,8 @@ void sched_stop_secondary_cpus(void);
int cpu_is_online(struct cpu_info *);
void sched_init_runqueues(void);
-void setrunqueue(struct proc *);
-void remrunqueue(struct proc *);
-
-/* Inherit the parent's scheduler history */
-#define scheduler_fork_hook(parent, child) do {
\
- (child)->p_estcpu = (parent)->p_estcpu; \
-} while (0)
+void setrunqueue(struct cpu_info *, struct proc *, uint8_t);
+uint8_t remrunqueue(struct cpu_info *, struct proc *);
/* Chargeback parents for the sins of their children. */
#define scheduler_wait_hook(parent, child) do {
\
Index: sys/sysctl.h
===================================================================
RCS file: /cvs/src/sys/sys/sysctl.h,v
retrieving revision 1.188
diff -u -p -r1.188 sysctl.h
--- sys/sysctl.h 1 Jun 2019 14:11:18 -0000 1.188
+++ sys/sysctl.h 1 Jun 2019 16:36:13 -0000
@@ -629,7 +629,7 @@ do {
\
(kp)->p_stat = (p)->p_stat; \
(kp)->p_slptime = (p)->p_slptime; \
(kp)->p_holdcnt = 1; \
- (kp)->p_priority = (p)->p_priority; \
+ (kp)->p_priority = (p)->p_usrpri + PZERO; \
(kp)->p_usrpri = (p)->p_usrpri; \
if ((p)->p_wchan && (p)->p_wmesg) \
copy_str((kp)->p_wmesg, (p)->p_wmesg, \