On 12/05/19(Sun) 18:17, Martin Pieuchot wrote: > People started complaining that the SCHED_LOCK() is contended. Here's a > first round at reducing its scope. > > Diff below introduces a per-process mutex to protect time accounting > fields accessed in tuagg(). tuagg() is principally called in mi_switch() > where the SCHED_LOCK() is currently held. Moving these fields out of > its scope allows us to drop some SCHED_LOCK/UNLOCK dances in accounting > path. > > Note that hardclock(9) still increments p_{u,s,i}ticks without holding a > lock. I doubt it's worth doing anything so this diff doesn't change > anything in that regard.
Updated diff: - Use struct assignment instead of timespecadd() to initialize `tu_runtime', pointed out by visa@. - Do not use mtx_enter(9)/leave(9) in libkvm's version of FILL_PROC(), pointed out by guenther@ and lteo@. Oks? Index: kern/init_main.c =================================================================== RCS file: /cvs/src/sys/kern/init_main.c,v retrieving revision 1.284 diff -u -p -r1.284 init_main.c --- kern/init_main.c 26 Feb 2019 14:24:21 -0000 1.284 +++ kern/init_main.c 25 May 2019 18:53:51 -0000 @@ -518,7 +518,9 @@ main(void *framep) getnanotime(&pr->ps_start); TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { nanouptime(&p->p_cpu->ci_schedstate.spc_runtime); + mtx_enter(&pr->ps_mtx); timespecclear(&p->p_rtime); + mtx_leave(&pr->ps_mtx); } } Index: kern/kern_acct.c =================================================================== RCS file: /cvs/src/sys/kern/kern_acct.c,v retrieving revision 1.36 diff -u -p -r1.36 kern_acct.c --- kern/kern_acct.c 28 Apr 2018 03:13:04 -0000 1.36 +++ kern/kern_acct.c 25 May 2019 18:53:51 -0000 @@ -179,7 +179,9 @@ acct_process(struct proc *p) memcpy(acct.ac_comm, pr->ps_comm, sizeof acct.ac_comm); /* (2) The amount of user and system time that was used */ + mtx_enter(&pr->ps_mtx); calctsru(&pr->ps_tu, &ut, &st, NULL); + mtx_leave(&pr->ps_mtx); acct.ac_utime = encode_comp_t(ut.tv_sec, ut.tv_nsec); acct.ac_stime = encode_comp_t(st.tv_sec, st.tv_nsec); Index: kern/kern_exec.c =================================================================== RCS file: /cvs/src/sys/kern/kern_exec.c,v retrieving revision 1.203 diff -u -p -r1.203 kern_exec.c --- kern/kern_exec.c 8 Feb 2019 12:51:57 -0000 1.203 +++ kern/kern_exec.c 25 May 2019 18:53:51 -0000 @@ -661,8 +661,10 @@ sys_execve(struct proc *p, void *v, regi } /* reset CPU time usage for the thread, but not the process */ + mtx_enter(&pr->ps_mtx); timespecclear(&p->p_tu.tu_runtime); p->p_tu.tu_uticks = p->p_tu.tu_sticks = p->p_tu.tu_iticks = 0; + mtx_leave(&pr->ps_mtx); km_free(argp, NCARGS, &kv_exec, &kp_pageable); Index: kern/kern_exit.c =================================================================== RCS file: /cvs/src/sys/kern/kern_exit.c,v retrieving revision 1.174 diff -u -p -r1.174 kern_exit.c --- kern/kern_exit.c 13 May 2019 19:21:31 -0000 1.174 +++ kern/kern_exit.c 25 May 2019 18:53:52 -0000 @@ -284,7 +284,7 @@ exit1(struct proc *p, int rv, int flags) /* add thread's accumulated rusage into the process's total */ ruadd(rup, &p->p_ru); - tuagg(pr, p); + tuagg(p, NULL); /* * clear %cpu usage during swap @@ -296,7 +296,9 @@ exit1(struct proc *p, int rv, int flags) * Final thread has died, so add on our children's rusage * and calculate the total times */ + mtx_enter(&pr->ps_mtx); calcru(&pr->ps_tu, &rup->ru_utime, &rup->ru_stime, NULL); + mtx_leave(&pr->ps_mtx); ruadd(rup, &pr->ps_cru); /* notify interested parties of our demise and clean up */ Index: kern/kern_fork.c =================================================================== RCS file: /cvs/src/sys/kern/kern_fork.c,v retrieving revision 1.209 diff -u -p -r1.209 kern_fork.c --- kern/kern_fork.c 6 Jan 2019 12:59:45 -0000 1.209 +++ kern/kern_fork.c 25 May 2019 18:53:52 -0000 @@ -55,6 +55,7 @@ #include <sys/sysctl.h> #include <sys/pool.h> #include <sys/mman.h> +#include <sys/mutex.h> #include <sys/ptrace.h> #include <sys/atomic.h> #include <sys/pledge.h> @@ -209,6 +210,8 @@ process_initialize(struct process *pr, s LIST_INIT(&pr->ps_ftlist); LIST_INIT(&pr->ps_kqlist); LIST_INIT(&pr->ps_sigiolst); + + mtx_init(&pr->ps_mtx, IPL_MPFLOOR); timeout_set(&pr->ps_realit_to, realitexpire, pr); timeout_set(&pr->ps_rucheck_to, rucheck, pr); Index: kern/kern_resource.c =================================================================== RCS file: /cvs/src/sys/kern/kern_resource.c,v retrieving revision 1.59 diff -u -p -r1.59 kern_resource.c --- kern/kern_resource.c 6 Jan 2019 12:59:45 -0000 1.59 +++ kern/kern_resource.c 25 May 2019 18:53:52 -0000 @@ -47,13 +47,13 @@ #include <sys/ktrace.h> #include <sys/sched.h> #include <sys/signalvar.h> - +#include <sys/mutex.h> #include <sys/mount.h> #include <sys/syscallargs.h> #include <uvm/uvm_extern.h> -void tuagg_sub(struct tusage *, struct proc *); +void tuagg_sub(struct tusage *, struct tusage *); /* * Patchable maximum data and stack limits. @@ -338,12 +338,12 @@ sys_getrlimit(struct proc *p, void *v, r } void -tuagg_sub(struct tusage *tup, struct proc *p) +tuagg_sub(struct tusage *ttup, struct tusage *ftup) { - timespecadd(&tup->tu_runtime, &p->p_rtime, &tup->tu_runtime); - tup->tu_uticks += p->p_uticks; - tup->tu_sticks += p->p_sticks; - tup->tu_iticks += p->p_iticks; + timespecadd(&ttup->tu_runtime, &ftup->tu_runtime, &ttup->tu_runtime); + ttup->tu_uticks += ftup->tu_uticks; + ttup->tu_sticks += ftup->tu_sticks; + ttup->tu_iticks += ftup->tu_iticks; } /* @@ -351,24 +351,26 @@ tuagg_sub(struct tusage *tup, struct pro * totals for the thread and process */ void -tuagg_unlocked(struct process *pr, struct proc *p) +tuagg(struct proc *p, struct timespec *tsp) { - tuagg_sub(&pr->ps_tu, p); - tuagg_sub(&p->p_tu, p); - timespecclear(&p->p_rtime); + struct process *pr = p->p_p; + struct tusage tu; + + mtx_enter(&pr->ps_mtx); + tu.tu_uticks = p->p_uticks; + tu.tu_sticks = p->p_sticks; + tu.tu_iticks = p->p_iticks; p->p_uticks = 0; p->p_sticks = 0; p->p_iticks = 0; -} - -void -tuagg(struct process *pr, struct proc *p) -{ - int s; + if (tsp != NULL) + timespecadd(&p->p_rtime, tsp, &p->p_rtime); + tu.tu_runtime = p->p_rtime; + timespecclear(&p->p_rtime); + tuagg_sub(&pr->ps_tu, &tu); + tuagg_sub(&p->p_tu, &tu); + mtx_leave(&pr->ps_mtx); - SCHED_LOCK(s); - tuagg_unlocked(pr, p); - SCHED_UNLOCK(s); } /* @@ -461,15 +463,19 @@ dogetrusage(struct proc *p, int who, str /* add on all living threads */ TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) { ruadd(rup, &q->p_ru); - tuagg(pr, q); + tuagg(q, NULL); } + mtx_enter(&pr->ps_mtx); calcru(&pr->ps_tu, &rup->ru_utime, &rup->ru_stime, NULL); + mtx_leave(&pr->ps_mtx); break; case RUSAGE_THREAD: *rup = p->p_ru; + mtx_enter(&pr->ps_mtx); calcru(&p->p_tu, &rup->ru_utime, &rup->ru_stime, NULL); + mtx_leave(&pr->ps_mtx); break; case RUSAGE_CHILDREN: @@ -507,13 +513,12 @@ rucheck(void *arg) struct process *pr = arg; struct rlimit *rlim; rlim_t runtime; - int s; KERNEL_ASSERT_LOCKED(); - SCHED_LOCK(s); + mtx_enter(&pr->ps_mtx); runtime = pr->ps_tu.tu_runtime.tv_sec; - SCHED_UNLOCK(s); + mtx_leave(&pr->ps_mtx); rlim = &pr->ps_limit->pl_rlimit[RLIMIT_CPU]; if (runtime >= rlim->rlim_cur) { Index: kern/kern_sched.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sched.c,v retrieving revision 1.56 diff -u -p -r1.56 kern_sched.c --- kern/kern_sched.c 26 Mar 2019 04:24:22 -0000 1.56 +++ kern/kern_sched.c 25 May 2019 18:53:52 -0000 @@ -212,13 +212,17 @@ void sched_exit(struct proc *p) { struct schedstate_percpu *spc = &curcpu()->ci_schedstate; + struct process *pr = p->p_p; struct timespec ts; struct proc *idle; int s; nanouptime(&ts); timespecsub(&ts, &spc->spc_runtime, &ts); + + mtx_enter(&pr->ps_mtx); timespecadd(&p->p_rtime, &ts, &p->p_rtime); + mtx_leave(&pr->ps_mtx); LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash); Index: kern/kern_sysctl.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sysctl.c,v retrieving revision 1.355 diff -u -p -r1.355 kern_sysctl.c --- kern/kern_sysctl.c 9 May 2019 14:59:30 -0000 1.355 +++ kern/kern_sysctl.c 25 May 2019 18:53:52 -0000 @@ -1641,7 +1641,9 @@ fill_kproc(struct process *pr, struct ki if ((pr->ps_flags & PS_ZOMBIE) == 0) { if ((pr->ps_flags & PS_EMBRYO) == 0 && vm != NULL) ki->p_vm_rssize = vm_resident_count(vm); + mtx_enter(&pr->ps_mtx); calctsru(isthread ? &p->p_tu : &pr->ps_tu, &ut, &st, NULL); + mtx_leave(&pr->ps_mtx); ki->p_uutime_sec = ut.tv_sec; ki->p_uutime_usec = ut.tv_nsec/1000; ki->p_ustime_sec = st.tv_sec; Index: kern/kern_time.c =================================================================== RCS file: /cvs/src/sys/kern/kern_time.c,v retrieving revision 1.116 diff -u -p -r1.116 kern_time.c --- kern/kern_time.c 21 May 2019 08:30:35 -0000 1.116 +++ kern/kern_time.c 25 May 2019 18:53:52 -0000 @@ -106,6 +106,7 @@ settime(const struct timespec *ts) int clock_gettime(struct proc *p, clockid_t clock_id, struct timespec *tp) { + struct process *pr = p->p_p; struct bintime bt; struct proc *q; int error = 0; @@ -126,14 +127,18 @@ clock_gettime(struct proc *p, clockid_t case CLOCK_PROCESS_CPUTIME_ID: nanouptime(tp); timespecsub(tp, &curcpu()->ci_schedstate.spc_runtime, tp); - timespecadd(tp, &p->p_p->ps_tu.tu_runtime, tp); + mtx_enter(&pr->ps_mtx); + timespecadd(tp, &pr->ps_tu.tu_runtime, tp); timespecadd(tp, &p->p_rtime, tp); + mtx_leave(&pr->ps_mtx); break; case CLOCK_THREAD_CPUTIME_ID: nanouptime(tp); timespecsub(tp, &curcpu()->ci_schedstate.spc_runtime, tp); + mtx_enter(&pr->ps_mtx); timespecadd(tp, &p->p_tu.tu_runtime, tp); timespecadd(tp, &p->p_rtime, tp); + mtx_leave(&pr->ps_mtx); break; default: /* check for clock from pthread_getcpuclockid() */ @@ -142,8 +147,11 @@ clock_gettime(struct proc *p, clockid_t q = tfind(__CLOCK_PTID(clock_id) - THREAD_PID_OFFSET); if (q == NULL || q->p_p != p->p_p) error = ESRCH; - else + else { + mtx_enter(&pr->ps_mtx); *tp = q->p_tu.tu_runtime; + mtx_leave(&pr->ps_mtx); + } KERNEL_UNLOCK(); } else error = EINVAL; Index: kern/sched_bsd.c =================================================================== RCS file: /cvs/src/sys/kern/sched_bsd.c,v retrieving revision 1.51 diff -u -p -r1.51 sched_bsd.c --- kern/sched_bsd.c 25 May 2019 18:11:10 -0000 1.51 +++ kern/sched_bsd.c 25 May 2019 18:53:52 -0000 @@ -343,7 +343,6 @@ mi_switch(void) struct schedstate_percpu *spc = &curcpu()->ci_schedstate; struct proc *p = curproc; struct proc *nextproc; - struct process *pr = p->p_p; struct timespec ts; #ifdef MULTIPROCESSOR int hold_count; @@ -381,11 +380,10 @@ mi_switch(void) #endif } else { timespecsub(&ts, &spc->spc_runtime, &ts); - timespecadd(&p->p_rtime, &ts, &p->p_rtime); } /* add the time counts for this thread to the process's total */ - tuagg_unlocked(pr, p); + tuagg(p, &ts); /* * Process is about to yield the CPU; clear the appropriate Index: kern/tty.c =================================================================== RCS file: /cvs/src/sys/kern/tty.c,v retrieving revision 1.144 diff -u -p -r1.144 tty.c --- kern/tty.c 13 May 2019 19:21:31 -0000 1.144 +++ kern/tty.c 25 May 2019 18:53:53 -0000 @@ -2191,7 +2191,9 @@ update_pickpr: rss = (pickpr->ps_flags & (PS_EMBRYO | PS_ZOMBIE)) ? 0 : vm_resident_count(pickpr->ps_vmspace); + mtx_enter(&pickpr->ps_mtx); calctsru(&pickpr->ps_tu, &utime, &stime, NULL); + mtx_leave(&pickpr->ps_mtx); /* Round up and print user time. */ utime.tv_nsec += 5000000; Index: sys/proc.h =================================================================== RCS file: /cvs/src/sys/sys/proc.h,v retrieving revision 1.265 diff -u -p -r1.265 proc.h --- sys/proc.h 13 May 2019 19:21:31 -0000 1.265 +++ sys/proc.h 25 May 2019 18:53:53 -0000 @@ -154,6 +154,11 @@ RBT_HEAD(unvname_rbt, unvname); struct futex; LIST_HEAD(futex_list, futex); struct unveil; + +/* + * Locks used to protect struct members in this file: + * m this process' `ps_mtx' + */ struct process { /* * ps_mainproc is the original thread in the process. @@ -181,6 +186,7 @@ struct process { struct futex_list ps_ftlist; /* futexes attached to this process */ LIST_HEAD(, kqueue) ps_kqlist; /* kqueues attached to this process */ + struct mutex ps_mtx; /* per-process mutex */ /* The following fields are all zeroed upon creation in process_new. */ #define ps_startzero ps_klist @@ -200,7 +206,7 @@ struct process { struct ptrace_state *ps_ptstat;/* Ptrace state */ struct rusage *ps_ru; /* sum of stats for dead threads. */ - struct tusage ps_tu; /* accumulated times. */ + struct tusage ps_tu; /* [m] accumulated times. */ struct rusage ps_cru; /* sum of stats for reaped children */ struct itimerval ps_timer[3]; /* timers, indexed by ITIMER_* */ struct timeout ps_rucheck_to; /* resource limit check timer */ @@ -301,12 +307,14 @@ struct lock_list_entry; /* * Locks used to protect struct members in this file: * s scheduler lock + * I immutable after creation + * pm parent process `ps_mtx' */ struct proc { TAILQ_ENTRY(proc) p_runq; /* [s] current run/sleep queue */ LIST_ENTRY(proc) p_list; /* List of all threads. */ - struct process *p_p; /* The process of this thread. */ + struct process *p_p; /* [I] The process of this thread. */ TAILQ_ENTRY(proc) p_thr_link; /* Threads in a process linkage. */ TAILQ_ENTRY(proc) p_fut_link; /* Threads in a futex linkage. */ @@ -337,17 +345,17 @@ struct proc { int p_cpticks; /* Ticks of cpu time. */ const volatile void *p_wchan; /* [s] Sleep address. */ struct timeout p_sleep_to;/* timeout for tsleep() */ + struct cpu_info * volatile p_cpu; /* [s] CPU we're running on. */ const char *p_wmesg; /* [s] Reason for sleep. */ fixpt_t p_pctcpu; /* [s] %cpu for this thread */ u_int p_slptime; /* [s] Time since last blocked. */ - u_int p_uticks; /* Statclock hits in user mode. */ - u_int p_sticks; /* Statclock hits in system mode. */ - u_int p_iticks; /* Statclock hits processing intr. */ - struct cpu_info * volatile p_cpu; /* [s] CPU we're running on. */ + u_int p_uticks; /* [pm] Statclock hits in user mode */ + u_int p_sticks; /* [pm] Statclock hits in system mode */ + u_int p_iticks; /* [pm] Statclock hits in intr. */ struct rusage p_ru; /* Statistics */ - struct tusage p_tu; /* accumulated times. */ - struct timespec p_rtime; /* Real time. */ + struct tusage p_tu; /* [pm] accumulated times. */ + struct timespec p_rtime; /* [pm] Real time. */ int p_siglist; /* Signals arrived but not delivered. */ Index: sys/resourcevar.h =================================================================== RCS file: /cvs/src/sys/sys/resourcevar.h,v retrieving revision 1.19 diff -u -p -r1.19 resourcevar.h --- sys/resourcevar.h 6 Jan 2019 12:59:45 -0000 1.19 +++ sys/resourcevar.h 25 May 2019 18:53:53 -0000 @@ -58,8 +58,7 @@ do { \ #ifdef _KERNEL void addupc_intr(struct proc *, u_long); void addupc_task(struct proc *, u_long, u_int); -void tuagg_unlocked(struct process *, struct proc *); -void tuagg(struct process *, struct proc *); +void tuagg(struct proc *, struct timespec *); struct tusage; void calctsru(struct tusage *, struct timespec *, struct timespec *, struct timespec *); Index: sys/sysctl.h =================================================================== RCS file: /cvs/src/sys/sys/sysctl.h,v retrieving revision 1.186 diff -u -p -r1.186 sysctl.h --- sys/sysctl.h 13 May 2019 19:21:31 -0000 1.186 +++ sys/sysctl.h 25 May 2019 18:53:53 -0000 @@ -536,6 +536,14 @@ struct kinfo_vmentry { * p_tpgid, p_tsess, p_vm_rssize, p_u[us]time_{sec,usec}, p_cpuid */ +#if defined(_KERNEL) +#define PR_LOCK(pr) mtx_enter(&(pr)->ps_mtx) +#define PR_UNLOCK(pr) mtx_leave(&(pr)->ps_mtx) +#else +#define PR_LOCK(pr) /* nothing */ +#define PR_UNLOCK(pr) /* nothing */ +#endif + #define PTRTOINT64(_x) ((u_int64_t)(u_long)(_x)) #define FILL_KPROC(kp, copy_str, p, pr, uc, pg, paddr, \ @@ -574,6 +582,7 @@ do { \ (kp)->p_jobc = (pg)->pg_jobc; \ \ (kp)->p_estcpu = (p)->p_estcpu; \ + PR_LOCK(pr); \ if (isthread) { \ (kp)->p_rtime_sec = (p)->p_tu.tu_runtime.tv_sec; \ (kp)->p_rtime_usec = (p)->p_tu.tu_runtime.tv_nsec/1000; \ @@ -589,6 +598,7 @@ do { \ (kp)->p_sticks = (pr)->ps_tu.tu_sticks; \ (kp)->p_iticks = (pr)->ps_tu.tu_iticks; \ } \ + PR_UNLOCK(pr); \ (kp)->p_cpticks = (p)->p_cpticks; \ \ if (show_addresses) \