Hi,

This is pretty cool diff in splitting the sleep prio and the run prio!

In a few places, the documentation comment could be changed from process to 
proc, tried to find it below and mark. It leaves reader confused for a moment.

thanks

> > - `spc_curpriority' is now updated at every context-switch.  That means
> >    need_resched() won't be called after comparing an out-of-date value.
> >    At the same time, `p_usrpri' is initialized to the highest possible
> >    value for idle threads.
> > - resched_proc() was calling need_resched() in the following conditions:
> >    - If the SONPROC thread has a higher priority that the current
> >      running thread (itself).
> >    - Twice in setrunnable() when we know that p_priority <= p_usrpri.
> >    - If schedcpu() considered that a thread, after updating its prio,
> >      should preempt the one running on the CPU pointed by `p_cpu'. 
> > 
> >   The diff below simplify all of that by calling need_resched() when:
> >    - A thread is inserted in a CPU runqueue at a higher priority than
> >      the one SONPROC.
> >    - schedcpu() decides that a thread in SRUN state should preempt the
> >      one SONPROC.

Just FYI, this should fix a serious bug, the resched_proc() call was very wrong 
in comparing stale priority in deciding what to schedule, and it made a pretty 
bad decision consistently!

> > - `p_estcpu' `p_usrpri' and `p_slptime' which represent the "priority"
> >   of a thread are now updated while holding a per-thread mutex.  As a
> >   result schedclock() and donice() no longer takes the SCHED_LOCK(),
> >   and schedcpu() almost never take it.

You forgot to add resetpriority() which is also moved from SCHED_LOCK!

> > 
> > - With this diff top(1) and ps(1) will report the "real" `p_usrpi' value
> >   when displaying priorities.  This is helpful to understand what's
> >   happening:
> > 
> > load averages:  0.99,  0.56,  0.25               two.lab.grenadille.net 
> > 23:42:10
> > 70 threads: 68 idle, 2 on processor                                    up  
> > 0:09
> > CPU0:  0.0% user,  0.0% nice, 51.0% sys,  2.0% spin,  0.0% intr, 47.1% idle
> > CPU1:  2.0% user,  0.0% nice, 51.0% sys,  3.9% spin,  0.0% intr, 43.1% idle
> > Memory: Real: 47M/1005M act/tot Free: 2937M Cache: 812M Swap: 0K/4323M
> > 
> >   PID      TID PRI NICE  SIZE   RES STATE     WAIT      TIME    CPU COMMAND
> > 81000   145101  72    0    0K 1664K sleep/1   bored     1:15 36.96% softnet
> > 47133   244097  73    0 2984K 4408K sleep/1   netio     1:06 35.06% cvs 
> > 64749   522184  66    0  176K  148K onproc/1  -         0:55 28.81% nfsd
> > 21615   602473 127    0    0K 1664K sleep/0   -         7:22  0.00% idle0  
> > 12413   606242 127    0    0K 1664K sleep/1   -         7:08  0.00% idle1
> > 85778   338258  50    0 4936K 7308K idle      select    0:10  0.00% ssh  
> > 22771   575513  50    0  176K  148K sleep/0   nfsd      0:02  0.00% nfsd 
> > ....
> > 
> > 
> > - The removal of `p_priority' and the change that makes mi_switch()
> >   always update `spc_curpriority' might introduce some changes in
> >   behavior, especially with kernel threads that were not going through
> >   tsleep(9).  We currently have some situations where the priority of
> >   the running thread isn't correctly reflected.  This diff changes that
> >   which means we should be able to better understand where the problems
> >   are.
> > 
> > I'd be interested in comments/tests/reviews before continuing in this
> > direction.  Note that at least part of this diff are required to split
> > the accounting apart from the SCHED_LOCK() as well.
> > 
> > I'll also work on exporting scheduler statistics unless somebody wants
> > to beat me :)
> 
> Updated diff to use IPL_SCHED and rebased to apply on top of -current :) 
> 
> Index: arch/amd64/amd64/genassym.cf
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/genassym.cf,v
> retrieving revision 1.40
> diff -u -p -r1.40 genassym.cf
> --- arch/amd64/amd64/genassym.cf      17 May 2019 19:07:15 -0000      1.40
> +++ arch/amd64/amd64/genassym.cf      1 Jun 2019 16:27:46 -0000
> @@ -32,7 +32,6 @@ export      VM_MIN_KERNEL_ADDRESS
>  
>  struct       proc
>  member       p_addr
> -member       p_priority
>  member       p_stat
>  member       p_wchan
>  member       P_MD_REGS       p_md.md_regs
> Index: arch/hppa/hppa/genassym.cf
> ===================================================================
> RCS file: /cvs/src/sys/arch/hppa/hppa/genassym.cf,v
> retrieving revision 1.47
> diff -u -p -r1.47 genassym.cf
> --- arch/hppa/hppa/genassym.cf        9 Feb 2015 08:20:13 -0000       1.47
> +++ arch/hppa/hppa/genassym.cf        1 Jun 2019 17:21:44 -0000
> @@ -130,7 +130,6 @@ member    tf_cr30
>  # proc fields and values
>  struct       proc
>  member       p_addr
> -member       p_priority
>  member       p_stat
>  member       p_wchan
>  member       p_md
> Index: arch/i386/i386/esm.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/i386/i386/esm.c,v
> retrieving revision 1.59
> diff -u -p -r1.59 esm.c
> --- arch/i386/i386/esm.c      8 Sep 2015 07:12:56 -0000       1.59
> +++ arch/i386/i386/esm.c      1 Jun 2019 16:05:18 -0000
> @@ -331,7 +331,7 @@ esm_watchdog(void *arg, int period)
>        * should have a process context we can sleep in.
>        */
>       while (sc->sc_step != 0) {
> -             if (tsleep(sc, PUSER | PCATCH, "esm", 0) == EINTR) {
> +             if (tsleep(sc, PWAIT | PCATCH, "esm", 0) == EINTR) {
>                       splx(s);
>                       return (sc->sc_wdog_period);
>               }
> Index: arch/i386/i386/genassym.cf
> ===================================================================
> RCS file: /cvs/src/sys/arch/i386/i386/genassym.cf,v
> retrieving revision 1.47
> diff -u -p -r1.47 genassym.cf
> --- arch/i386/i386/genassym.cf        22 Jun 2018 13:21:14 -0000      1.47
> +++ arch/i386/i386/genassym.cf        1 Jun 2019 16:27:58 -0000
> @@ -72,7 +72,6 @@ export      VM_MAXUSER_ADDRESS
>  # proc fields and values
>  struct       proc
>  member       p_addr
> -member       p_priority
>  member       p_stat
>  member       p_wchan
>  member       p_vmspace
> Index: arch/m88k/m88k/m88k_machdep.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/m88k/m88k/m88k_machdep.c,v
> retrieving revision 1.69
> diff -u -p -r1.69 m88k_machdep.c
> --- arch/m88k/m88k/m88k_machdep.c     22 Oct 2018 17:31:24 -0000      1.69
> +++ arch/m88k/m88k/m88k_machdep.c     1 Jun 2019 16:28:43 -0000
> @@ -564,9 +564,7 @@ cpu_emergency_disable()
>                * to mi_switch().
>                */
>               SCHED_LOCK(s);
> -             p->p_priority = p->p_usrpri;
> -             p->p_stat = SRUN;
> -             setrunqueue(p);
> +             setrunqueue(p->p_cpu, p, p->p_usrpri);
>               p->p_ru.ru_nvcsw++;
>               SCHED_UNLOCK(s);
>       }
> Index: arch/sparc64/sparc64/db_interface.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/sparc64/db_interface.c,v
> retrieving revision 1.51
> diff -u -p -r1.51 db_interface.c
> --- arch/sparc64/sparc64/db_interface.c       23 Mar 2019 05:47:23 -0000      
> 1.51
> +++ arch/sparc64/sparc64/db_interface.c       1 Jun 2019 17:22:32 -0000
> @@ -964,10 +964,10 @@ db_proc_cmd(addr, have_addr, count, modi
>               return;
>       }
>       db_printf("process %p:", p);
> -     db_printf("pid:%d vmspace:%p pmap:%p ctx:%x wchan:%p pri:%d upri:%d\n",
> +     db_printf("pid:%d vmspace:%p pmap:%p ctx:%x wchan:%p spri:%d upri:%d\n",
>           p->p_p->ps_pid, p->p_vmspace, p->p_vmspace->vm_map.pmap,
>           p->p_vmspace->vm_map.pmap->pm_ctx,
> -         p->p_wchan, p->p_priority, p->p_usrpri);
> +         p->p_wchan, p->p_slpprio, p->p_usrpri);
>       db_printf("maxsaddr:%p ssiz:%dpg or %llxB\n",
>           p->p_vmspace->vm_maxsaddr, p->p_vmspace->vm_ssize,
>           (unsigned long long)ptoa(p->p_vmspace->vm_ssize));
> Index: dev/pci/drm/drm_linux.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> retrieving revision 1.36
> diff -u -p -r1.36 drm_linux.c
> --- dev/pci/drm/drm_linux.c   11 May 2019 17:13:59 -0000      1.36
> +++ dev/pci/drm/drm_linux.c   1 Jun 2019 16:29:20 -0000
> @@ -116,7 +116,7 @@ wake_up_process(struct proc *p)
>       atomic_cas_ptr(&sch_proc, p, NULL);
>       if (p->p_wchan) {
>               if (p->p_stat == SSLEEP) {
> -                     setrunnable(p);
> +                     setrunnable(p, p->p_slpprio);
>                       r = 1;
>               } else
>                       unsleep(p);
> Index: dev/pci/drm/i915/intel_breadcrumbs.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_breadcrumbs.c,v
> retrieving revision 1.1
> diff -u -p -r1.1 intel_breadcrumbs.c
> --- dev/pci/drm/i915/intel_breadcrumbs.c      14 Apr 2019 10:14:52 -0000      
> 1.1
> +++ dev/pci/drm/i915/intel_breadcrumbs.c      1 Jun 2019 16:30:43 -0000
> @@ -451,7 +451,7 @@ static bool __intel_engine_add_wait(stru
>  #ifdef __linux__
>                       if (wait->tsk->prio > to_wait(parent)->tsk->prio) {
>  #else
> -                     if (wait->tsk->p_priority > 
> to_wait(parent)->tsk->p_priority) {
> +                     if (wait->tsk->p_usrpri > 
> to_wait(parent)->tsk->p_usrpri) {
>  #endif
>                               p = &parent->rb_right;
>                               first = false;
> @@ -538,7 +538,7 @@ static inline bool chain_wakeup(struct r
>  #else
>  static inline bool chain_wakeup(struct rb_node *rb, int priority)
>  {
> -     return rb && to_wait(rb)->tsk->p_priority <= priority;
> +     return rb && to_wait(rb)->tsk->p_usrpri <= priority;
>  }
>  #endif
>  
> @@ -558,7 +558,7 @@ static inline int wakeup_priority(struct
>       if (p == b->signaler)
>               return INT_MIN;
>       else
> -             return p->p_priority;
> +             return p->p_usrpri;
>  }
>  #endif
>  
> Index: kern/init_main.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/init_main.c,v
> retrieving revision 1.288
> diff -u -p -r1.288 init_main.c
> --- kern/init_main.c  2 Jun 2019 03:58:28 -0000       1.288
> +++ kern/init_main.c  2 Jun 2019 18:46:01 -0000
> @@ -200,6 +200,7 @@ main(void *framep)
>        */
>       curproc = p = &proc0;
>       p->p_cpu = curcpu();
> +     mtx_init(&p->p_mtx, IPL_SCHED);
>  
>       /*
>        * Initialize timeouts.
> Index: kern/kern_exit.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_exit.c,v
> retrieving revision 1.176
> diff -u -p -r1.176 kern_exit.c
> --- kern/kern_exit.c  1 Jun 2019 14:11:17 -0000       1.176
> +++ kern/kern_exit.c  1 Jun 2019 16:04:29 -0000
> @@ -164,7 +164,7 @@ exit1(struct proc *p, int rv, int flags)
>       if ((p->p_flag & P_THREAD) == 0) {
>               /* main thread gotta wait because it has the pid, et al */
>               while (pr->ps_refcnt > 1)
> -                     tsleep(&pr->ps_threads, PUSER, "thrdeath", 0);
> +                     tsleep(&pr->ps_threads, PWAIT, "thrdeath", 0);
>               if (pr->ps_flags & PS_PROFIL)
>                       stopprofclock(pr);
>       }
> Index: kern/kern_fork.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_fork.c,v
> retrieving revision 1.212
> diff -u -p -r1.212 kern_fork.c
> --- kern/kern_fork.c  1 Jun 2019 14:11:17 -0000       1.212
> +++ kern/kern_fork.c  2 Jun 2019 18:41:40 -0000
> @@ -146,11 +146,13 @@ sys___tfork(struct proc *p, void *v, reg
>  struct proc *
>  thread_new(struct proc *parent, vaddr_t uaddr)
>  {
> -     struct proc *p; 
> +     struct proc *p;
>  
>       p = pool_get(&proc_pool, PR_WAITOK);
>       p->p_stat = SIDL;                       /* protect against others */
> +     p->p_runprio = 0;
>       p->p_flag = 0;
> +     mtx_init(&p->p_mtx, IPL_SCHED);
>  
>       /*
>        * Make a proc table entry for the new process.
> @@ -169,13 +171,6 @@ thread_new(struct proc *parent, vaddr_t 
>        */
>       timeout_set(&p->p_sleep_to, endtsleep, p);
>  
> -     /*
> -      * set priority of child to be that of parent
> -      * XXX should move p_estcpu into the region of struct proc which gets
> -      * copied.
> -      */
> -     scheduler_fork_hook(parent, p);
> -
>  #ifdef WITNESS
>       p->p_sleeplocks = NULL;
>  #endif
> @@ -328,9 +323,8 @@ fork_thread_start(struct proc *p, struct
>       int s;
>  
>       SCHED_LOCK(s);
> -     p->p_stat = SRUN;
> -     p->p_cpu = sched_choosecpu_fork(parent, flags);
> -     setrunqueue(p);
> +     p->p_cpu = parent->p_cpu;
> +     setrunqueue(NULL, p, p->p_usrpri);
>       SCHED_UNLOCK(s);
>  }
>  
> Index: kern/kern_proc.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_proc.c,v
> retrieving revision 1.85
> diff -u -p -r1.85 kern_proc.c
> --- kern/kern_proc.c  12 Nov 2018 15:09:17 -0000      1.85
> +++ kern/kern_proc.c  1 Jun 2019 16:36:57 -0000
> @@ -475,8 +475,8 @@ proc_printit(struct proc *p, const char 
>       (*pr)("PROC (%s) pid=%d stat=%s\n", p->p_p->ps_comm, p->p_tid, pst);
>       (*pr)("    flags process=%b proc=%b\n",
>           p->p_p->ps_flags, PS_BITS, p->p_flag, P_BITS);
> -     (*pr)("    pri=%u, usrpri=%u, nice=%d\n",
> -         p->p_priority, p->p_usrpri, p->p_p->ps_nice);
> +     (*pr)("    slpprio=%u, usrpri=%u, nice=%d\n",
> +         p->p_slpprio, p->p_usrpri, p->p_p->ps_nice);
>       (*pr)("    forw=%p, list=%p,%p\n",
>           TAILQ_NEXT(p, p_runq), p->p_list.le_next, p->p_list.le_prev);
>       (*pr)("    process=%p user=%p, vmspace=%p\n",
> Index: kern/kern_resource.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_resource.c,v
> retrieving revision 1.63
> diff -u -p -r1.63 kern_resource.c
> --- kern/kern_resource.c      2 Jun 2019 03:58:28 -0000       1.63
> +++ kern/kern_resource.c      2 Jun 2019 18:46:01 -0000
> @@ -180,7 +180,6 @@ donice(struct proc *curp, struct process
>  {
>       struct ucred *ucred = curp->p_ucred;
>       struct proc *p;
> -     int s;
>  
>       if (ucred->cr_uid != 0 && ucred->cr_ruid != 0 &&
>           ucred->cr_uid != chgpr->ps_ucred->cr_uid &&
> @@ -193,11 +192,12 @@ donice(struct proc *curp, struct process
>       n += NZERO;
>       if (n < chgpr->ps_nice && suser(curp))
>               return (EACCES);
> +     TAILQ_FOREACH(p, &chgpr->ps_threads, p_thr_link) {
> +             mtx_enter(&p->p_mtx);
> +             resetpriority(p, p->p_estcpu, n);
> +             mtx_leave(&p->p_mtx);
> +     }
>       chgpr->ps_nice = n;
> -     SCHED_LOCK(s);
> -     TAILQ_FOREACH(p, &chgpr->ps_threads, p_thr_link)
> -             (void)resetpriority(p);
> -     SCHED_UNLOCK(s);
>       return (0);
>  }
>  
> Index: kern/kern_sched.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sched.c,v
> retrieving revision 1.58
> diff -u -p -r1.58 kern_sched.c
> --- kern/kern_sched.c 1 Jun 2019 14:11:17 -0000       1.58
> +++ kern/kern_sched.c 1 Jun 2019 20:04:12 -0000
> @@ -149,6 +149,7 @@ sched_idle(void *v)
>       cpuset_add(&sched_idle_cpus, ci);
>       p->p_stat = SSLEEP;
>       p->p_cpu = ci;
> +     p->p_usrpri = MAXPRI;
>       atomic_setbits_int(&p->p_flag, P_CPUPEG);
>       mi_switch();
>       cpuset_del(&sched_idle_cpus, ci);
> @@ -244,39 +245,59 @@ sched_init_runqueues(void)
>  }
>  
>  void
> -setrunqueue(struct proc *p)
> +setrunqueue(struct cpu_info *ci, struct proc *p, uint8_t prio)
>  {
>       struct schedstate_percpu *spc;
> -     int queue = p->p_priority >> 2;
> +     int queue = prio >> 2;
> +
> +     if (ci == NULL)
> +             ci = sched_choosecpu(p);
> +
> +     KASSERT(ci != NULL);
> +
> +     p->p_cpu = ci;
> +     p->p_stat = SRUN;
> +     p->p_runprio = prio;
>  
>       SCHED_ASSERT_LOCKED();
> -     spc = &p->p_cpu->ci_schedstate;
> +
> +     spc = &ci->ci_schedstate;
>       spc->spc_nrun++;
>  
>       TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
>       spc->spc_whichqs |= (1 << queue);
> -     cpuset_add(&sched_queued_cpus, p->p_cpu);
> +     cpuset_add(&sched_queued_cpus, ci);
>  
> -     if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
> -             cpu_unidle(p->p_cpu);
> +     if (cpuset_isset(&sched_idle_cpus, ci))
> +             cpu_unidle(ci);
> +
> +     if (prio < spc->spc_curpriority)
> +             need_resched(ci);
>  }
>  
> -void
> -remrunqueue(struct proc *p)
> +uint8_t
> +remrunqueue(struct cpu_info *ci, struct proc *p)
>  {
> -     struct schedstate_percpu *spc;
> -     int queue = p->p_priority >> 2;
> +     struct schedstate_percpu *spc = &ci->ci_schedstate;
> +     uint8_t prio = p->p_runprio;
> +     int queue = prio >> 2;
>  
>       SCHED_ASSERT_LOCKED();
> -     spc = &p->p_cpu->ci_schedstate;
> +
>       spc->spc_nrun--;
>  
>       TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
>       if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
>               spc->spc_whichqs &= ~(1 << queue);
>               if (spc->spc_whichqs == 0)
> -                     cpuset_del(&sched_queued_cpus, p->p_cpu);
> +                     cpuset_del(&sched_queued_cpus, ci);
>       }
> +
> +     KASSERT(p->p_stat == SRUN);
> +     KASSERT(p->p_cpu == ci);
> +     p->p_runprio = 0;
> +
> +     return (prio);
>  }
>  
>  struct proc *
> @@ -293,10 +314,12 @@ sched_chooseproc(void)
>               if (spc->spc_whichqs) {
>                       for (queue = 0; queue < SCHED_NQS; queue++) {
>                               while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) {
> -                                     remrunqueue(p);
> -                                     p->p_cpu = sched_choosecpu(p);
> -                                     setrunqueue(p);
> -                                     if (p->p_cpu == curcpu()) {
> +                                     struct cpu_info *ci;
> +                                     uint8_t prio;
> +
> +                                     prio = remrunqueue(p->p_cpu, p);
> +                                     setrunqueue(NULL, p, prio);
> +                                     if (ci == curcpu()) {
>                                               KASSERT(p->p_flag & P_CPUPEG);
>                                               goto again;
>                                       }
> @@ -315,7 +338,7 @@ again:
>       if (spc->spc_whichqs) {
>               queue = ffs(spc->spc_whichqs) - 1;
>               p = TAILQ_FIRST(&spc->spc_qs[queue]);
> -             remrunqueue(p);
> +             remrunqueue(p->p_cpu, p);
>               sched_noidle++;
>               KASSERT(p->p_stat == SRUN);
>       } else if ((p = sched_steal_proc(curcpu())) == NULL) {
> @@ -337,66 +360,10 @@ again:
>                  }
>               KASSERT(p);
>               p->p_stat = SRUN;
> -     } 
> -
> -     KASSERT(p->p_wchan == NULL);
> -     return (p);     
> -}
> -
> -struct cpu_info *
> -sched_choosecpu_fork(struct proc *parent, int flags)
> -{
> -#ifdef MULTIPROCESSOR
> -     struct cpu_info *choice = NULL;
> -     fixpt_t load, best_load = ~0;
> -     int run, best_run = INT_MAX;
> -     struct cpu_info *ci;
> -     struct cpuset set;
> -
> -#if 0
> -     /*
> -      * XXX
> -      * Don't do this until we have a painless way to move the cpu in exec.
> -      * Preferably when nuking the old pmap and getting a new one on a
> -      * new cpu.
> -      */
> -     /*
> -      * PPWAIT forks are simple. We know that the parent will not
> -      * run until we exec and choose another cpu, so we just steal its
> -      * cpu.
> -      */
> -     if (flags & FORK_PPWAIT)
> -             return (parent->p_cpu);
> -#endif
> -
> -     /*
> -      * Look at all cpus that are currently idle and have nothing queued.
> -      * If there are none, pick the one with least queued procs first,
> -      * then the one with lowest load average.
> -      */
> -     cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
> -     cpuset_intersection(&set, &set, &sched_all_cpus);
> -     if (cpuset_first(&set) == NULL)
> -             cpuset_copy(&set, &sched_all_cpus);
> -
> -     while ((ci = cpuset_first(&set)) != NULL) {
> -             cpuset_del(&set, ci);
> -
> -             load = ci->ci_schedstate.spc_ldavg;
> -             run = ci->ci_schedstate.spc_nrun;
> -
> -             if (choice == NULL || run < best_run ||
> -                 (run == best_run &&load < best_load)) {
> -                     choice = ci;
> -                     best_load = load;
> -                     best_run = run;
> -             }
>       }
>  
> -     return (choice);
> -#else
> -     return (curcpu());
> -#endif
> +     KASSERT(p->p_wchan == NULL);
> +     return (p);
>  }

glad to see this function will be gone!

>  struct cpu_info *
> @@ -408,6 +375,8 @@ sched_choosecpu(struct proc *p)
>       struct cpu_info *ci;
>       struct cpuset set;
>  
> +     KASSERT(p->p_cpu != NULL);
> +
>       /*
>        * If pegged to a cpu, don't allow it to move.
>        */
> @@ -509,8 +478,7 @@ sched_steal_proc(struct cpu_info *self)
>       if (best == NULL)
>               return (NULL);
>  
> -     spc = &best->p_cpu->ci_schedstate;
> -     remrunqueue(best);
> +     remrunqueue(best->p_cpu, best);
>       best->p_cpu = self;
>  
>       sched_stolen++;
> @@ -566,7 +534,7 @@ sched_proc_to_cpu_cost(struct cpu_info *
>        * and the higher the priority of the proc.
>        */
>       if (!cpuset_isset(&sched_idle_cpus, ci)) {
> -             cost += (p->p_priority - spc->spc_curpriority) *
> +             cost += (p->p_usrpri - spc->spc_curpriority) *
>                   sched_cost_priority;
>               cost += sched_cost_runnable;
>       }
> @@ -610,11 +578,8 @@ sched_peg_curproc(struct cpu_info *ci)
>       int s;
>  
>       SCHED_LOCK(s);
> -     p->p_priority = p->p_usrpri;
> -     p->p_stat = SRUN;
> -     p->p_cpu = ci;
>       atomic_setbits_int(&p->p_flag, P_CPUPEG);
> -     setrunqueue(p);
> +     setrunqueue(ci, p, p->p_usrpri);
>       p->p_ru.ru_nvcsw++;
>       mi_switch();
>       SCHED_UNLOCK(s);
> Index: kern/kern_sig.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sig.c,v
> retrieving revision 1.230
> diff -u -p -r1.230 kern_sig.c
> --- kern/kern_sig.c   13 May 2019 19:21:31 -0000      1.230
> +++ kern/kern_sig.c   1 Jun 2019 16:26:18 -0000
> @@ -890,6 +890,7 @@ ptsignal(struct proc *p, int signum, enu
>       struct process *pr = p->p_p;
>       struct proc *q;
>       int wakeparent = 0;
> +     uint8_t stpprio;
>  
>       KERNEL_ASSERT_LOCKED();
>  
> @@ -1154,10 +1155,11 @@ runfast:
>       /*
>        * Raise priority to at least PUSER.
>        */
> -     if (p->p_priority > PUSER)
> -             p->p_priority = PUSER;
> +     stpprio = p->p_usrpri;
> +     if (stpprio > PUSER)
> +             stpprio = PUSER;
>  run:
> -     setrunnable(p);
> +     setrunnable(p, stpprio);
>  out:
>       SCHED_UNLOCK(s);
>       if (wakeparent)
> @@ -1909,7 +1911,7 @@ userret(struct proc *p)
>  
>       WITNESS_WARN(WARN_PANIC, NULL, "userret: returning");
>  
> -     p->p_cpu->ci_schedstate.spc_curpriority = p->p_priority = p->p_usrpri;
> +     p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
>  }
>  
>  int
> @@ -1995,7 +1997,7 @@ single_thread_set(struct proc *p, enum s
>                       if (mode == SINGLE_EXIT) {
>                               SCHED_LOCK(s);
>                               if (q->p_stat == SSTOP) {
> -                                     setrunnable(q);
> +                                     setrunnable(q, q->p_usrpri);
>                                       pr->ps_singlecount++;
>                               }
>                               SCHED_UNLOCK(s);
> @@ -2019,13 +2021,13 @@ single_thread_set(struct proc *p, enum s
>                                       break;
>                               }
>                               /* need to unwind or exit, so wake it */
> -                             setrunnable(q);
> +                             setrunnable(q, q->p_slpprio);
>                       }
>                       pr->ps_singlecount++;
>                       break;
>               case SSTOP:
>                       if (mode == SINGLE_EXIT) {
> -                             setrunnable(q);
> +                             setrunnable(q, q->p_usrpri);
>                               pr->ps_singlecount++;
>                       }
>                       break;
> @@ -2050,7 +2052,7 @@ single_thread_wait(struct process *pr)
>  {
>       /* wait until they're all suspended */
>       while (pr->ps_singlecount > 0)
> -             tsleep(&pr->ps_singlecount, PUSER, "suspend", 0);
> +             tsleep(&pr->ps_singlecount, PWAIT, "suspend", 0);
>  }
>  
>  void
> @@ -2079,7 +2081,7 @@ single_thread_clear(struct proc *p, int 
>               SCHED_LOCK(s);
>               if (q->p_stat == SSTOP && (q->p_flag & flag) == 0) {
>                       if (q->p_wchan == 0)
> -                             setrunnable(q);
> +                             setrunnable(q, q->p_usrpri);
>                       else
>                               q->p_stat = SSLEEP;
>               }
> Index: kern/kern_synch.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_synch.c,v
> retrieving revision 1.148
> diff -u -p -r1.148 kern_synch.c
> --- kern/kern_synch.c 23 Apr 2019 13:35:12 -0000      1.148
> +++ kern/kern_synch.c 1 Jun 2019 17:52:51 -0000
> @@ -280,8 +280,10 @@ sleep_setup(struct sleep_state *sls, con
>  
>       p->p_wchan = ident;
>       p->p_wmesg = wmesg;
> +     mtx_enter(&p->p_mtx);
>       p->p_slptime = 0;
> -     p->p_priority = prio & PRIMASK;
> +     mtx_leave(&p->p_mtx);
> +     p->p_slpprio = prio & PRIMASK;
>       TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
>  }
>  
> @@ -304,7 +306,6 @@ sleep_finish(struct sleep_state *sls, in
>               panic("sleep_finish !SONPROC");
>  #endif
>  
> -     p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
>       SCHED_UNLOCK(sls->sls_s);
>  
>       /*
> @@ -399,7 +400,7 @@ endtsleep(void *arg)
>       SCHED_LOCK(s);
>       if (p->p_wchan) {
>               if (p->p_stat == SSLEEP)
> -                     setrunnable(p);
> +                     setrunnable(p, p->p_slpprio);
>               else
>                       unsleep(p);
>               atomic_setbits_int(&p->p_flag, P_TIMEOUT);
> @@ -454,7 +455,7 @@ wakeup_n(const volatile void *ident, int
>                       p->p_wchan = 0;
>                       TAILQ_REMOVE(qp, p, p_runq);
>                       if (p->p_stat == SSLEEP)
> -                             setrunnable(p);
> +                             setrunnable(p, p->p_slpprio);
>               }
>       }
>       SCHED_UNLOCK(s);
> @@ -473,6 +474,7 @@ int
>  sys_sched_yield(struct proc *p, void *v, register_t *retval)
>  {
>       struct proc *q;
> +     uint8_t newprio;
>       int s;
>  
>       SCHED_LOCK(s);
> @@ -481,11 +483,10 @@ sys_sched_yield(struct proc *p, void *v,
>        * sched_yield(2), drop its priority to ensure its siblings
>        * can make some progress.
>        */
> -     p->p_priority = p->p_usrpri;
> +     newprio = p->p_usrpri;
>       TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link)
> -             p->p_priority = max(p->p_priority, q->p_priority);
> -     p->p_stat = SRUN;
> -     setrunqueue(p);
> +             newprio = max(newprio, q->p_runprio);
> +     setrunqueue(p->p_cpu, p, newprio);
>       p->p_ru.ru_nvcsw++;
>       mi_switch();
>       SCHED_UNLOCK(s);
> @@ -571,7 +572,7 @@ thrsleep(struct proc *p, struct sys___th
>               void *sleepaddr = &p->p_thrslpid;
>               if (ident == -1)
>                       sleepaddr = &globalsleepaddr;
> -             error = tsleep(sleepaddr, PUSER | PCATCH, "thrsleep",
> +             error = tsleep(sleepaddr, PWAIT | PCATCH, "thrsleep",
>                   (int)to_ticks);
>       }
>  
> Index: kern/sched_bsd.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sched_bsd.c,v
> retrieving revision 1.53
> diff -u -p -r1.53 sched_bsd.c
> --- kern/sched_bsd.c  1 Jun 2019 14:11:17 -0000       1.53
> +++ kern/sched_bsd.c  1 Jun 2019 21:26:33 -0000
> @@ -61,8 +61,8 @@ int rrticks_init;           /* # of hardclock tic
>  struct __mp_lock sched_lock;
>  #endif
>  
> -void  schedcpu(void *);
> -void  updatepri(struct proc *);
> +void         schedcpu(void *);
> +uint32_t     decay_aftersleep(struct proc *, uint32_t, uint32_t);
>  
>  void
>  scheduler_start(void)
> @@ -206,6 +206,7 @@ schedcpu(void *arg)
>       struct proc *p;
>       int s;
>       unsigned int newcpu;
> +     uint8_t newprio;
>       int phz;
>  
>       /*
> @@ -228,6 +229,7 @@ schedcpu(void *arg)
>               /*
>                * Increment sleep time (if sleeping). We ignore overflow.
>                */
> +             mtx_enter(&p->p_mtx);
>               if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
>                       p->p_slptime++;


I think this above calculation of incrementing p_slptime is incorrect! This 
should be more precise to know if this thread was definitely in a sleep or stop 
state for the entire second, then only it should be incremented. Maybe 
something like this below?

if ((p->p_stat == SSLEEP || p->p_stat == SSTOP) && p->p_cpticks == 0)
      p->p_slptime++;

>               p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
> @@ -235,9 +237,10 @@ schedcpu(void *arg)
>                * If the process has slept the entire second,

New comment: If the proc has slept the entire second,

>                * stop recalculating its priority until it wakes up.
>                */
> -             if (p->p_slptime > 1)
> +             if (p->p_slptime > 1) {
> +                     mtx_leave(&p->p_mtx);
>                       continue;
> -             SCHED_LOCK(s);
> +             }
>               /*
>                * p_pctcpu is only for diagnostic tools such as ps.
>                */
> @@ -252,19 +255,26 @@ schedcpu(void *arg)
>  #endif
>               p->p_cpticks = 0;
>               newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu);
> -             p->p_estcpu = newcpu;
> -             resetpriority(p);
> -             if (p->p_priority >= PUSER) {
> -                     if (p->p_stat == SRUN &&
> -                         (p->p_priority / SCHED_PPQ) !=
> -                         (p->p_usrpri / SCHED_PPQ)) {
> -                             remrunqueue(p);
> -                             p->p_priority = p->p_usrpri;
> -                             setrunqueue(p);
> -                     } else
> -                             p->p_priority = p->p_usrpri;
> +             newprio = resetpriority(p, newcpu, p->p_p->ps_nice);
> +             mtx_leave(&p->p_mtx);
> +
> +             if (p->p_stat == SRUN) {
> +                     SCHED_LOCK(s);
> +                     if (p->p_stat == SRUN) {

This is an unnecessary check for SRUN again, checked just 2 lines above.

> +                             struct schedstate_percpu *spc;
> +                             uint8_t runprio;
> +
> +                             spc = &p->p_cpu->ci_schedstate;
> +                             runprio = p->p_runprio;
> +                             if ((runprio >= PUSER) &&
> +                                 (SRUNQ(runprio) != SRUNQ(newprio))) {
> +                                     remrunqueue(p->p_cpu, p);
> +                                     setrunqueue(p->p_cpu, p, newprio);
> +                             } else if (newprio < spc->spc_curpriority)
> +                                     need_resched(p->p_cpu);
> +                     }
> +                     SCHED_UNLOCK(s);
>               }
> -             SCHED_UNLOCK(s);
>       }
>       uvm_meter();
>       wakeup(&lbolt);
> @@ -276,23 +286,23 @@ schedcpu(void *arg)
>   * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
>   * least six times the loadfactor will decay p_estcpu to zero.
>   */
> -void
> -updatepri(struct proc *p)
> +uint32_t
> +decay_aftersleep(struct proc *p, uint32_t estcpu, uint32_t slptime)
>  {
> -     unsigned int newcpu = p->p_estcpu;
>       fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
> +     uint32_t newcpu;
>  
> -     SCHED_ASSERT_LOCKED();
> -
> -     if (p->p_slptime > 5 * loadfac)
> -             p->p_estcpu = 0;
> +     if (slptime > 5 * loadfac)
> +             newcpu = 0;
>       else {
> -             p->p_slptime--; /* the first time was done in schedcpu */
> -             while (newcpu && --p->p_slptime)
> -                     newcpu = (int) decay_cpu(loadfac, newcpu);
> -             p->p_estcpu = newcpu;
> +             newcpu = estcpu;
> +             slptime--;      /* the first time was done in schedcpu */
> +             while (newcpu && --slptime)
> +                     newcpu = decay_cpu(loadfac, newcpu);
> +
>       }
> -     resetpriority(p);
> +
> +     return (newcpu);
>  }
>  
>  /*
> @@ -308,9 +318,7 @@ yield(void)
>       NET_ASSERT_UNLOCKED();
>  
>       SCHED_LOCK(s);
> -     p->p_priority = p->p_usrpri;
> -     p->p_stat = SRUN;
> -     setrunqueue(p);
> +     setrunqueue(p->p_cpu, p, p->p_usrpri);
>       p->p_ru.ru_nvcsw++;
>       mi_switch();
>       SCHED_UNLOCK(s);
> @@ -329,9 +337,7 @@ preempt(void)
>       int s;
>  
>       SCHED_LOCK(s);
> -     p->p_priority = p->p_usrpri;
> -     p->p_stat = SRUN;
> -     setrunqueue(p);
> +     setrunqueue(p->p_cpu, p, p->p_usrpri);
>       p->p_ru.ru_nivcsw++;
>       mi_switch();
>       SCHED_UNLOCK(s);
> @@ -427,7 +433,9 @@ mi_switch(void)
>        */
>       KASSERT(p->p_cpu == curcpu());
>  
> -     nanouptime(&p->p_cpu->ci_schedstate.spc_runtime);
> +     spc = &curcpu()->ci_schedstate;
> +     spc->spc_curpriority = p->p_usrpri;
> +     nanouptime(&spc->spc_runtime);
>  
>  #ifdef MULTIPROCESSOR
>       /*
> @@ -441,36 +449,13 @@ mi_switch(void)
>  #endif
>  }
>  
> -static __inline void
> -resched_proc(struct proc *p, u_char pri)
> -{
> -     struct cpu_info *ci;
> -
> -     /*
> -      * XXXSMP
> -      * This does not handle the case where its last
> -      * CPU is running a higher-priority process, but every
> -      * other CPU is running a lower-priority process.  There
> -      * are ways to handle this situation, but they're not
> -      * currently very pretty, and we also need to weigh the
> -      * cost of moving a process from one CPU to another.
> -      *
> -      * XXXSMP
> -      * There is also the issue of locking the other CPU's
> -      * sched state, which we currently do not do.
> -      */
> -     ci = (p->p_cpu != NULL) ? p->p_cpu : curcpu();
> -     if (pri < ci->ci_schedstate.spc_curpriority)
> -             need_resched(ci);
> -}
> -
>  /*
>   * Change process state to be runnable,

New comment: Change proc state to be runnable,

>   * placing it on the run queue if it is in memory,
>   * and awakening the swapper if it isn't in memory.
>   */
>  void
> -setrunnable(struct proc *p)
> +setrunnable(struct proc *p, uint8_t slpprio)
>  {
>       SCHED_ASSERT_LOCKED();
>  
> @@ -493,13 +478,18 @@ setrunnable(struct proc *p)
>               unsleep(p);             /* e.g. when sending signals */
>               break;
>       }
> -     p->p_stat = SRUN;
> -     p->p_cpu = sched_choosecpu(p);
> -     setrunqueue(p);
> -     if (p->p_slptime > 1)
> -             updatepri(p);
> +     /* Put the process on any runqueue using its sleeping priority. */
> +     setrunqueue(NULL, p, slpprio);
> +
> +     mtx_enter(&p->p_mtx);
> +     if (p->p_slptime > 1) {
> +             uint32_t newcpu;
> +
> +             newcpu = decay_aftersleep(p, p->p_estcpu, p->p_slptime);
> +             resetpriority(p, newcpu, p->p_p->ps_nice);
> +     }
>       p->p_slptime = 0;
> -     resched_proc(p, p->p_priority);
> +     mtx_leave(&p->p_mtx);
>  }
>  
>  /*
> @@ -507,18 +497,18 @@ setrunnable(struct proc *p)
>   * Arrange to reschedule if the resulting priority is better
>   * than that of the current process.
>   */

New comment: than that of the current proc.

> -void
> -resetpriority(struct proc *p)
> +uint8_t
> +resetpriority(struct proc *p, uint32_t newcpu, uint8_t nice)
>  {
> -     unsigned int newpriority;
> +     unsigned int newprio;
>  
> -     SCHED_ASSERT_LOCKED();
> +     newprio = min((PUSER + newcpu + NICE_WEIGHT * (nice - NZERO)), MAXPRI);
> +
> +     MUTEX_ASSERT_LOCKED(&p->p_mtx);
> +     p->p_estcpu = newcpu;
> +     p->p_usrpri = newprio;
>  
> -     newpriority = PUSER + p->p_estcpu +
> -         NICE_WEIGHT * (p->p_p->ps_nice - NZERO);
> -     newpriority = min(newpriority, MAXPRI);
> -     p->p_usrpri = newpriority;
> -     resched_proc(p, p->p_usrpri);
> +     return (newprio);
>  }
>  
>  /*
> @@ -540,17 +530,17 @@ schedclock(struct proc *p)
>  {
>       struct cpu_info *ci = curcpu();
>       struct schedstate_percpu *spc = &ci->ci_schedstate;
> -     int s;
> +     uint32_t newcpu;
>  
>       if (p == spc->spc_idleproc || spc->spc_spinning)
>               return;
>  
> -     SCHED_LOCK(s);
> -     p->p_estcpu = ESTCPULIM(p->p_estcpu + 1);
> -     resetpriority(p);
> -     if (p->p_priority >= PUSER)
> -             p->p_priority = p->p_usrpri;
> -     SCHED_UNLOCK(s);
> +     /* Only decay the priority if nobody is messing with it. */
> +     if (!mtx_enter_try(&p->p_mtx))
> +             return;
> +     newcpu = ESTCPULIM(p->p_estcpu + 1);
> +     resetpriority(p, newcpu, p->p_p->ps_nice);
> +     mtx_leave(&p->p_mtx);
>  }
>  
>  void (*cpu_setperf)(int);
> Index: kern/sys_futex.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sys_futex.c,v
> retrieving revision 1.12
> diff -u -p -r1.12 sys_futex.c
> --- kern/sys_futex.c  6 Feb 2019 15:11:20 -0000       1.12
> +++ kern/sys_futex.c  1 Jun 2019 16:04:57 -0000
> @@ -254,7 +254,7 @@ futex_wait(uint32_t *uaddr, uint32_t val
>       TAILQ_INSERT_TAIL(&f->ft_threads, p, p_fut_link);
>       p->p_futex = f;
>  
> -     error = rwsleep(p, &ftlock, PUSER|PCATCH, "fsleep", (int)to_ticks);
> +     error = rwsleep(p, &ftlock, PWAIT | PCATCH, "fsleep", (int)to_ticks);
>       if (error == ERESTART)
>               error = ECANCELED;
>       else if (error == EWOULDBLOCK) {
> Index: kern/sys_generic.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sys_generic.c,v
> retrieving revision 1.123
> diff -u -p -r1.123 sys_generic.c
> --- kern/sys_generic.c        21 Jan 2019 23:41:26 -0000      1.123
> +++ kern/sys_generic.c        1 Jun 2019 15:59:16 -0000
> @@ -806,7 +806,7 @@ selwakeup(struct selinfo *sip)
>               SCHED_LOCK(s);
>               if (p->p_wchan == (caddr_t)&selwait) {
>                       if (p->p_stat == SSLEEP)
> -                             setrunnable(p);
> +                             setrunnable(p, p->p_slpprio);
>                       else
>                               unsleep(p);
>               } else if (p->p_flag & P_SELECT)
> Index: kern/sys_process.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sys_process.c,v
> retrieving revision 1.80
> diff -u -p -r1.80 sys_process.c
> --- kern/sys_process.c        19 Feb 2018 09:25:13 -0000      1.80
> +++ kern/sys_process.c        1 Jun 2019 16:18:41 -0000
> @@ -493,7 +493,7 @@ ptrace_ctrl(struct proc *p, int req, pid
>               if (t->p_stat == SSTOP) {
>                       t->p_xstat = data;
>                       SCHED_LOCK(s);
> -                     setrunnable(t);
> +                     setrunnable(t, t->p_usrpri);
>                       SCHED_UNLOCK(s);
>               } else {
>                       if (data != 0)
> Index: kern/vfs_sync.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/vfs_sync.c,v
> retrieving revision 1.60
> diff -u -p -r1.60 vfs_sync.c
> --- kern/vfs_sync.c   13 Aug 2018 15:26:17 -0000      1.60
> +++ kern/vfs_sync.c   1 Jun 2019 16:20:49 -0000
> @@ -245,7 +245,7 @@ speedup_syncer(void)
>  
>       SCHED_LOCK(s);
>       if (syncerproc && syncerproc->p_wchan == &lbolt)
> -             setrunnable(syncerproc);
> +             setrunnable(syncerproc, syncerproc->p_usrpri);
>       SCHED_UNLOCK(s);
>       if (rushjob < syncdelay / 2) {
>               rushjob += 1;
> Index: sys/proc.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/proc.h,v
> retrieving revision 1.268
> diff -u -p -r1.268 proc.h
> --- sys/proc.h        1 Jun 2019 22:42:18 -0000       1.268
> +++ sys/proc.h        2 Jun 2019 18:50:17 -0000
> @@ -307,6 +307,7 @@ struct p_inentry {
>  /*
>   *  Locks used to protect struct members in this file:
>   *   s       scheduler lock
> + *   m       `p_mtx'
>   */
>  struct proc {
>       TAILQ_ENTRY(proc) p_runq;       /* [s] current run/sleep queue */
> @@ -317,6 +318,7 @@ struct proc {
>  
>       TAILQ_ENTRY(proc) p_fut_link;   /* Threads in a futex linkage. */
>       struct  futex   *p_futex;       /* Current sleeping futex. */
> +     struct  mutex   p_mtx;
>  
>       /* substructures: */
>       struct  filedesc *p_fd;         /* copy of p_p->ps_fd */
> @@ -328,7 +330,7 @@ struct proc {
>       int     p_flag;                 /* P_* flags. */
>       u_char  p_spare;                /* unused */

Can p_spare be removed? Compiler will align this struct anyway.

>       char    p_stat;                 /* [s] S* process status. */
> -     char    p_pad1[1];
> +     uint8_t p_runprio;              /* [s] priority in SRUN. */
>       u_char  p_descfd;               /* if not 255, fdesc permits this fd */
>  
>       pid_t   p_tid;                  /* Thread identifier. */
> @@ -341,13 +343,12 @@ struct proc {
>       long    p_thrslpid;     /* for thrsleep syscall */
>  
>       /* scheduling */
> -     u_int   p_estcpu;               /* [s] Time averaged val of p_cpticks */
>       int     p_cpticks;       /* Ticks of cpu time. */
>       const volatile void *p_wchan;   /* [s] Sleep address. */
>       struct  timeout p_sleep_to;/* timeout for tsleep() */
>       const char *p_wmesg;            /* [s] Reason for sleep. */
> -     fixpt_t p_pctcpu;               /* [s] %cpu for this thread */
> -     u_int   p_slptime;              /* [s] Time since last blocked. */
> +     fixpt_t p_pctcpu;               /* [m] %cpu for this thread */
> +     u_int   p_slptime;              /* [m] Time since last blocked. */
>       u_int   p_uticks;               /* Statclock hits in user mode. */
>       u_int   p_sticks;               /* Statclock hits in system mode. */
>       u_int   p_iticks;               /* Statclock hits processing intr. */
> @@ -366,8 +367,13 @@ struct proc {
>  #define      p_startcopy     p_sigmask
>       sigset_t p_sigmask;     /* Current signal mask. */
>  
> -     u_char  p_priority;     /* [s] Process priority. */
> -     u_char  p_usrpri;       /* [s] User-prio based on p_estcpu & ps_nice. */
> +     u_int    p_spserial;
> +     vaddr_t  p_spstart;
> +     vaddr_t  p_spend;
> +
> +     u_char  p_slpprio;      /* [s] Sleeping priority. */
> +     u_char  p_usrpri;       /* [m] Priority based on p_estcpu & ps_nice. */
> +     u_int   p_estcpu;               /* [m] Time averaged val of p_cpticks */
>       int     p_pledge_syscall;       /* Cache of current syscall */
>  
>       struct  ucred *p_ucred;         /* cached credentials */
> @@ -550,8 +556,8 @@ void      leavepgrp(struct process *);
>  void killjobc(struct process *);
>  void preempt(void);
>  void procinit(void);
> -void resetpriority(struct proc *);
> -void setrunnable(struct proc *);
> +uint8_t      resetpriority(struct proc *, uint32_t, uint8_t);
> +void setrunnable(struct proc *, uint8_t);
>  void endtsleep(void *);
>  void unsleep(struct proc *);
>  void reaper(void *);
> Index: sys/sched.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/sched.h,v
> retrieving revision 1.52
> diff -u -p -r1.52 sched.h
> --- sys/sched.h       16 May 2019 13:52:47 -0000      1.52
> +++ sys/sched.h       1 Jun 2019 21:14:35 -0000
> @@ -137,6 +137,7 @@ struct cpustats {
>  #define SPCF_SHOULDHALT              0x0004  /* CPU should be vacated */
>  #define SPCF_HALTED          0x0008  /* CPU has been halted */
>  
> +#define SRUNQ(prio)  ((prio) / SCHED_PPQ)
>  #define      SCHED_PPQ       (128 / SCHED_NQS)       /* priorities per queue 
> */
>  #define NICE_WEIGHT 2                        /* priorities per nice level */
>  #define      ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX - SCHED_PPQ)
> @@ -179,13 +180,8 @@ void sched_stop_secondary_cpus(void);
>  int  cpu_is_online(struct cpu_info *);
>  
>  void sched_init_runqueues(void);
> -void setrunqueue(struct proc *);
> -void remrunqueue(struct proc *);
> -
> -/* Inherit the parent's scheduler history */
> -#define scheduler_fork_hook(parent, child) do {                              
> \
> -     (child)->p_estcpu = (parent)->p_estcpu;                         \
> -} while (0)
> +void setrunqueue(struct cpu_info *, struct proc *, uint8_t);
> +uint8_t remrunqueue(struct cpu_info *, struct proc *);
>  
>  /* Chargeback parents for the sins of their children.  */
>  #define scheduler_wait_hook(parent, child) do {                              
> \
> Index: sys/sysctl.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/sysctl.h,v
> retrieving revision 1.188
> diff -u -p -r1.188 sysctl.h
> --- sys/sysctl.h      1 Jun 2019 14:11:18 -0000       1.188
> +++ sys/sysctl.h      1 Jun 2019 16:36:13 -0000
> @@ -629,7 +629,7 @@ do {                                                      
>                 \
>               (kp)->p_stat = (p)->p_stat;                             \
>               (kp)->p_slptime = (p)->p_slptime;                       \
>               (kp)->p_holdcnt = 1;                                    \
> -             (kp)->p_priority = (p)->p_priority;                     \
> +             (kp)->p_priority = (p)->p_usrpri + PZERO;               \
>               (kp)->p_usrpri = (p)->p_usrpri;                         \
>               if ((p)->p_wchan && (p)->p_wmesg)                       \
>                       copy_str((kp)->p_wmesg, (p)->p_wmesg,           \
> 

Reply via email to