On Sun, May 12, 2019 at 06:17:04PM -0400, Martin Pieuchot wrote:
> People started complaining that the SCHED_LOCK() is contended.  Here's a
> first round at reducing its scope.
> 
> Diff below introduces a per-process mutex to protect time accounting
> fields accessed in tuagg().  tuagg() is principally called in mi_switch()
> where the SCHED_LOCK() is currently held.  Moving these fields out of
> its scope allows us to drop some SCHED_LOCK/UNLOCK dances in accounting
> path.
> 
> Note that hardclock(9) still increments p_{u,s,i}ticks without holding a
> lock.  I doubt it's worth doing anything so this diff doesn't change
> anything in that regard.
> 
> Comments, oks?
> 

I've been running this for 2 days on my main machine and it seems
solid.

Assuming you're confident you have mutexes in all the right places, ok
mlarkin when you're ready.

-ml


> Index: kern/init_main.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/init_main.c,v
> retrieving revision 1.284
> diff -u -p -r1.284 init_main.c
> --- kern/init_main.c  26 Feb 2019 14:24:21 -0000      1.284
> +++ kern/init_main.c  12 May 2019 21:56:23 -0000
> @@ -518,7 +518,9 @@ main(void *framep)
>               getnanotime(&pr->ps_start);
>               TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
>                       nanouptime(&p->p_cpu->ci_schedstate.spc_runtime);
> +                     mtx_enter(&pr->ps_mtx);
>                       timespecclear(&p->p_rtime);
> +                     mtx_leave(&pr->ps_mtx);
>               }
>       }
>  
> Index: kern/kern_acct.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_acct.c,v
> retrieving revision 1.36
> diff -u -p -r1.36 kern_acct.c
> --- kern/kern_acct.c  28 Apr 2018 03:13:04 -0000      1.36
> +++ kern/kern_acct.c  12 May 2019 21:51:09 -0000
> @@ -179,7 +179,9 @@ acct_process(struct proc *p)
>       memcpy(acct.ac_comm, pr->ps_comm, sizeof acct.ac_comm);
>  
>       /* (2) The amount of user and system time that was used */
> +     mtx_enter(&pr->ps_mtx);
>       calctsru(&pr->ps_tu, &ut, &st, NULL);
> +     mtx_leave(&pr->ps_mtx);
>       acct.ac_utime = encode_comp_t(ut.tv_sec, ut.tv_nsec);
>       acct.ac_stime = encode_comp_t(st.tv_sec, st.tv_nsec);
>  
> Index: kern/kern_exec.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_exec.c,v
> retrieving revision 1.203
> diff -u -p -r1.203 kern_exec.c
> --- kern/kern_exec.c  8 Feb 2019 12:51:57 -0000       1.203
> +++ kern/kern_exec.c  12 May 2019 21:53:56 -0000
> @@ -661,8 +661,10 @@ sys_execve(struct proc *p, void *v, regi
>       }
>  
>       /* reset CPU time usage for the thread, but not the process */
> +     mtx_enter(&pr->ps_mtx);
>       timespecclear(&p->p_tu.tu_runtime);
>       p->p_tu.tu_uticks = p->p_tu.tu_sticks = p->p_tu.tu_iticks = 0;
> +     mtx_leave(&pr->ps_mtx);
>  
>       km_free(argp, NCARGS, &kv_exec, &kp_pageable);
>  
> Index: kern/kern_exit.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_exit.c,v
> retrieving revision 1.173
> diff -u -p -r1.173 kern_exit.c
> --- kern/kern_exit.c  23 Jan 2019 22:39:47 -0000      1.173
> +++ kern/kern_exit.c  12 May 2019 21:51:22 -0000
> @@ -282,7 +282,7 @@ exit1(struct proc *p, int rv, int flags)
>  
>       /* add thread's accumulated rusage into the process's total */
>       ruadd(rup, &p->p_ru);
> -     tuagg(pr, p);
> +     tuagg(p, NULL);
>  
>       /*
>        * clear %cpu usage during swap
> @@ -294,7 +294,9 @@ exit1(struct proc *p, int rv, int flags)
>                * Final thread has died, so add on our children's rusage
>                * and calculate the total times
>                */
> +             mtx_enter(&pr->ps_mtx);
>               calcru(&pr->ps_tu, &rup->ru_utime, &rup->ru_stime, NULL);
> +             mtx_leave(&pr->ps_mtx);
>               ruadd(rup, &pr->ps_cru);
>  
>               /* notify interested parties of our demise and clean up */
> Index: kern/kern_fork.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_fork.c,v
> retrieving revision 1.209
> diff -u -p -r1.209 kern_fork.c
> --- kern/kern_fork.c  6 Jan 2019 12:59:45 -0000       1.209
> +++ kern/kern_fork.c  12 May 2019 21:55:57 -0000
> @@ -55,6 +55,7 @@
>  #include <sys/sysctl.h>
>  #include <sys/pool.h>
>  #include <sys/mman.h>
> +#include <sys/mutex.h>
>  #include <sys/ptrace.h>
>  #include <sys/atomic.h>
>  #include <sys/pledge.h>
> @@ -209,6 +210,8 @@ process_initialize(struct process *pr, s
>       LIST_INIT(&pr->ps_ftlist);
>       LIST_INIT(&pr->ps_kqlist);
>       LIST_INIT(&pr->ps_sigiolst);
> +
> +     mtx_init(&pr->ps_mtx, IPL_MPFLOOR);
>  
>       timeout_set(&pr->ps_realit_to, realitexpire, pr);
>       timeout_set(&pr->ps_rucheck_to, rucheck, pr);
> Index: kern/kern_resource.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_resource.c,v
> retrieving revision 1.59
> diff -u -p -r1.59 kern_resource.c
> --- kern/kern_resource.c      6 Jan 2019 12:59:45 -0000       1.59
> +++ kern/kern_resource.c      12 May 2019 21:54:52 -0000
> @@ -47,13 +47,13 @@
>  #include <sys/ktrace.h>
>  #include <sys/sched.h>
>  #include <sys/signalvar.h>
> -
> +#include <sys/mutex.h>
>  #include <sys/mount.h>
>  #include <sys/syscallargs.h>
>  
>  #include <uvm/uvm_extern.h>
>  
> -void tuagg_sub(struct tusage *, struct proc *);
> +void tuagg_sub(struct tusage *, struct tusage *);
>  
>  /*
>   * Patchable maximum data and stack limits.
> @@ -338,12 +338,13 @@ sys_getrlimit(struct proc *p, void *v, r
>  }
>  
>  void
> -tuagg_sub(struct tusage *tup, struct proc *p)
> +tuagg_sub(struct tusage *ttup, struct tusage *ftup)
>  {
> -     timespecadd(&tup->tu_runtime, &p->p_rtime, &tup->tu_runtime);
> -     tup->tu_uticks += p->p_uticks;
> -     tup->tu_sticks += p->p_sticks;
> -     tup->tu_iticks += p->p_iticks;
> +
> +     timespecadd(&ttup->tu_runtime, &ftup->tu_runtime, &ttup->tu_runtime);
> +     ttup->tu_uticks += ftup->tu_uticks;
> +     ttup->tu_sticks += ftup->tu_sticks;
> +     ttup->tu_iticks += ftup->tu_iticks;
>  }
>  
>  /*
> @@ -351,24 +352,28 @@ tuagg_sub(struct tusage *tup, struct pro
>   * totals for the thread and process
>   */
>  void
> -tuagg_unlocked(struct process *pr, struct proc *p)
> +tuagg(struct proc *p, struct timespec *tsp)
>  {
> -     tuagg_sub(&pr->ps_tu, p);
> -     tuagg_sub(&p->p_tu, p);
> -     timespecclear(&p->p_rtime);
> +     struct process *pr = p->p_p;
> +     struct tusage tu;
> +
> +     memset(&tu, 0, sizeof(tu));
> +
> +     mtx_enter(&pr->ps_mtx);
> +     tu.tu_uticks = p->p_uticks;
> +     tu.tu_sticks = p->p_sticks;
> +     tu.tu_iticks = p->p_iticks;
>       p->p_uticks = 0;
>       p->p_sticks = 0;
>       p->p_iticks = 0;
> -}
> -
> -void
> -tuagg(struct process *pr, struct proc *p)
> -{
> -     int s;
> +     if (tsp != NULL)
> +             timespecadd(&p->p_rtime, tsp, &p->p_rtime);
> +     timespecadd(&tu.tu_runtime, &p->p_rtime, &tu.tu_runtime);
> +     timespecclear(&p->p_rtime);
> +     tuagg_sub(&pr->ps_tu, &tu);
> +     tuagg_sub(&p->p_tu, &tu);
> +     mtx_leave(&pr->ps_mtx);
>  
> -     SCHED_LOCK(s);
> -     tuagg_unlocked(pr, p);
> -     SCHED_UNLOCK(s);
>  }
>  
>  /*
> @@ -461,15 +466,19 @@ dogetrusage(struct proc *p, int who, str
>               /* add on all living threads */
>               TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
>                       ruadd(rup, &q->p_ru);
> -                     tuagg(pr, q);
> +                     tuagg(q, NULL);
>               }
>  
> +             mtx_enter(&pr->ps_mtx);
>               calcru(&pr->ps_tu, &rup->ru_utime, &rup->ru_stime, NULL);
> +             mtx_leave(&pr->ps_mtx);
>               break;
>  
>       case RUSAGE_THREAD:
>               *rup = p->p_ru;
> +             mtx_enter(&pr->ps_mtx);
>               calcru(&p->p_tu, &rup->ru_utime, &rup->ru_stime, NULL);
> +             mtx_leave(&pr->ps_mtx);
>               break;
>  
>       case RUSAGE_CHILDREN:
> @@ -507,13 +516,12 @@ rucheck(void *arg)
>       struct process *pr = arg;
>       struct rlimit *rlim;
>       rlim_t runtime;
> -     int s;
>  
>       KERNEL_ASSERT_LOCKED();
>  
> -     SCHED_LOCK(s);
> +     mtx_enter(&pr->ps_mtx);
>       runtime = pr->ps_tu.tu_runtime.tv_sec;
> -     SCHED_UNLOCK(s);
> +     mtx_leave(&pr->ps_mtx);
>  
>       rlim = &pr->ps_limit->pl_rlimit[RLIMIT_CPU];
>       if (runtime >= rlim->rlim_cur) {
> Index: kern/kern_sched.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sched.c,v
> retrieving revision 1.56
> diff -u -p -r1.56 kern_sched.c
> --- kern/kern_sched.c 26 Mar 2019 04:24:22 -0000      1.56
> +++ kern/kern_sched.c 12 May 2019 21:55:31 -0000
> @@ -212,13 +212,17 @@ void
>  sched_exit(struct proc *p)
>  {
>       struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
> +     struct process *pr = p->p_p;
>       struct timespec ts;
>       struct proc *idle;
>       int s;
>  
>       nanouptime(&ts);
>       timespecsub(&ts, &spc->spc_runtime, &ts);
> +
> +     mtx_enter(&pr->ps_mtx);
>       timespecadd(&p->p_rtime, &ts, &p->p_rtime);
> +     mtx_leave(&pr->ps_mtx);
>  
>       LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash);
>  
> Index: kern/kern_sysctl.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> retrieving revision 1.355
> diff -u -p -r1.355 kern_sysctl.c
> --- kern/kern_sysctl.c        9 May 2019 14:59:30 -0000       1.355
> +++ kern/kern_sysctl.c        12 May 2019 21:52:33 -0000
> @@ -1641,7 +1641,9 @@ fill_kproc(struct process *pr, struct ki
>       if ((pr->ps_flags & PS_ZOMBIE) == 0) {
>               if ((pr->ps_flags & PS_EMBRYO) == 0 && vm != NULL)
>                       ki->p_vm_rssize = vm_resident_count(vm);
> +             mtx_enter(&pr->ps_mtx);
>               calctsru(isthread ? &p->p_tu : &pr->ps_tu, &ut, &st, NULL);
> +             mtx_leave(&pr->ps_mtx);
>               ki->p_uutime_sec = ut.tv_sec;
>               ki->p_uutime_usec = ut.tv_nsec/1000;
>               ki->p_ustime_sec = st.tv_sec;
> Index: kern/kern_time.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_time.c,v
> retrieving revision 1.115
> diff -u -p -r1.115 kern_time.c
> --- kern/kern_time.c  9 May 2019 20:30:21 -0000       1.115
> +++ kern/kern_time.c  12 May 2019 21:59:55 -0000
> @@ -106,6 +106,7 @@ settime(const struct timespec *ts)
>  int
>  clock_gettime(struct proc *p, clockid_t clock_id, struct timespec *tp)
>  {
> +     struct process *pr = p->p_p;
>       struct bintime bt;
>       struct proc *q;
>       int error = 0;
> @@ -126,14 +127,18 @@ clock_gettime(struct proc *p, clockid_t 
>       case CLOCK_PROCESS_CPUTIME_ID:
>               nanouptime(tp);
>               timespecsub(tp, &curcpu()->ci_schedstate.spc_runtime, tp);
> -             timespecadd(tp, &p->p_p->ps_tu.tu_runtime, tp);
> +             mtx_enter(&pr->ps_mtx);
> +             timespecadd(tp, &pr->ps_tu.tu_runtime, tp);
>               timespecadd(tp, &p->p_rtime, tp);
> +             mtx_leave(&pr->ps_mtx);
>               break;
>       case CLOCK_THREAD_CPUTIME_ID:
>               nanouptime(tp);
>               timespecsub(tp, &curcpu()->ci_schedstate.spc_runtime, tp);
> +             mtx_enter(&pr->ps_mtx);
>               timespecadd(tp, &p->p_tu.tu_runtime, tp);
>               timespecadd(tp, &p->p_rtime, tp);
> +             mtx_leave(&pr->ps_mtx);
>               break;
>       default:
>               /* check for clock from pthread_getcpuclockid() */
> @@ -142,8 +147,11 @@ clock_gettime(struct proc *p, clockid_t 
>                       q = tfind(__CLOCK_PTID(clock_id) - THREAD_PID_OFFSET);
>                       if (q == NULL || q->p_p != p->p_p)
>                               error = ESRCH;
> -                     else
> +                     else {
> +                             mtx_enter(&pr->ps_mtx);
>                               *tp = q->p_tu.tu_runtime;
> +                             mtx_leave(&pr->ps_mtx);
> +                     }
>                       KERNEL_UNLOCK();
>               } else
>                       error = EINVAL;
> Index: kern/sched_bsd.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sched_bsd.c,v
> retrieving revision 1.50
> diff -u -p -r1.50 sched_bsd.c
> --- kern/sched_bsd.c  26 Feb 2019 14:24:21 -0000      1.50
> +++ kern/sched_bsd.c  12 May 2019 21:35:18 -0000
> @@ -343,7 +343,6 @@ mi_switch(void)
>       struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
>       struct proc *p = curproc;
>       struct proc *nextproc;
> -     struct process *pr = p->p_p;
>       struct timespec ts;
>  #ifdef MULTIPROCESSOR
>       int hold_count;
> @@ -381,11 +380,10 @@ mi_switch(void)
>  #endif
>       } else {
>               timespecsub(&ts, &spc->spc_runtime, &ts);
> -             timespecadd(&p->p_rtime, &ts, &p->p_rtime);
>       }
>  
>       /* add the time counts for this thread to the process's total */
> -     tuagg_unlocked(pr, p);
> +     tuagg(p, &ts);
>  
>       /*
>        * Process is about to yield the CPU; clear the appropriate
> Index: kern/tty.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/tty.c,v
> retrieving revision 1.143
> diff -u -p -r1.143 tty.c
> --- kern/tty.c        6 Sep 2018 11:50:54 -0000       1.143
> +++ kern/tty.c        12 May 2019 21:52:55 -0000
> @@ -2191,7 +2191,9 @@ update_pickpr:
>               rss = (pickpr->ps_flags & (PS_EMBRYO | PS_ZOMBIE)) ? 0 :
>                   vm_resident_count(pickpr->ps_vmspace);
>  
> +             mtx_enter(&pickpr->ps_mtx);
>               calctsru(&pickpr->ps_tu, &utime, &stime, NULL);
> +             mtx_leave(&pickpr->ps_mtx);
>  
>               /* Round up and print user time. */
>               utime.tv_nsec += 5000000;
> Index: sys/proc.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/proc.h,v
> retrieving revision 1.264
> diff -u -p -r1.264 proc.h
> --- sys/proc.h        12 May 2019 13:18:58 -0000      1.264
> +++ sys/proc.h        12 May 2019 22:01:52 -0000
> @@ -154,6 +154,11 @@ RBT_HEAD(unvname_rbt, unvname);
>  struct futex;
>  LIST_HEAD(futex_list, futex);
>  struct unveil;
> +
> +/*
> + *  Locks used to protect struct members in this file:
> + *   m       this process' `ps_mtx'
> + */
>  struct process {
>       /*
>        * ps_mainproc is the original thread in the process.
> @@ -181,6 +186,7 @@ struct process {
>  
>       struct  futex_list ps_ftlist;   /* futexes attached to this process */
>       LIST_HEAD(, kqueue) ps_kqlist;  /* kqueues attached to this process */
> +     struct  mutex   ps_mtx;         /* per-process mutex */
>  
>  /* The following fields are all zeroed upon creation in process_new. */
>  #define      ps_startzero    ps_klist
> @@ -199,7 +205,7 @@ struct process {
>       struct  ptrace_state *ps_ptstat;/* Ptrace state */
>  
>       struct  rusage *ps_ru;          /* sum of stats for dead threads. */
> -     struct  tusage ps_tu;           /* accumulated times. */
> +     struct  tusage ps_tu;           /* [m] accumulated times. */
>       struct  rusage ps_cru;          /* sum of stats for reaped children */
>       struct  itimerval ps_timer[3];  /* timers, indexed by ITIMER_* */
>       struct  timeout ps_rucheck_to;  /* resource limit check timer */
> @@ -300,12 +306,14 @@ struct lock_list_entry;
>  /*
>   *  Locks used to protect struct members in this file:
>   *   s       scheduler lock
> + *   I       immutable after creation
> + *   pm      parent process `ps_mtx'
>   */
>  struct proc {
>       TAILQ_ENTRY(proc) p_runq;       /* [s] current run/sleep queue */
>       LIST_ENTRY(proc) p_list;        /* List of all threads. */
>  
> -     struct  process *p_p;           /* The process of this thread. */
> +     struct  process *p_p;           /* [I] The process of this thread. */
>       TAILQ_ENTRY(proc) p_thr_link;   /* Threads in a process linkage. */
>  
>       TAILQ_ENTRY(proc) p_fut_link;   /* Threads in a futex linkage. */
> @@ -336,17 +344,17 @@ struct proc {
>       int     p_cpticks;       /* Ticks of cpu time. */
>       const volatile void *p_wchan;   /* [s] Sleep address. */
>       struct  timeout p_sleep_to;/* timeout for tsleep() */
> +     struct  cpu_info * volatile p_cpu; /* [s] CPU we're running on. */
>       const char *p_wmesg;            /* [s] Reason for sleep. */
>       fixpt_t p_pctcpu;               /* [s] %cpu for this thread */
>       u_int   p_slptime;              /* [s] Time since last blocked. */
> -     u_int   p_uticks;               /* Statclock hits in user mode. */
> -     u_int   p_sticks;               /* Statclock hits in system mode. */
> -     u_int   p_iticks;               /* Statclock hits processing intr. */
> -     struct  cpu_info * volatile p_cpu; /* [s] CPU we're running on. */
> +     u_int   p_uticks;               /* [pm] Statclock hits in user mode */
> +     u_int   p_sticks;               /* [pm] Statclock hits in system mode */
> +     u_int   p_iticks;               /* [pm] Statclock hits in intr. */
>  
>       struct  rusage p_ru;            /* Statistics */
> -     struct  tusage p_tu;            /* accumulated times. */
> -     struct  timespec p_rtime;       /* Real time. */
> +     struct  tusage p_tu;            /* [pm] accumulated times. */
> +     struct  timespec p_rtime;       /* [pm] Real time. */
>  
>       int      p_siglist;             /* Signals arrived but not delivered. */
>  
> Index: sys/resourcevar.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/resourcevar.h,v
> retrieving revision 1.19
> diff -u -p -r1.19 resourcevar.h
> --- sys/resourcevar.h 6 Jan 2019 12:59:45 -0000       1.19
> +++ sys/resourcevar.h 12 May 2019 21:33:45 -0000
> @@ -58,8 +58,7 @@ do {                                                        
>                 \
>  #ifdef _KERNEL
>  void  addupc_intr(struct proc *, u_long);
>  void  addupc_task(struct proc *, u_long, u_int);
> -void  tuagg_unlocked(struct process *, struct proc *);
> -void  tuagg(struct process *, struct proc *);
> +void  tuagg(struct proc *, struct timespec *);
>  struct tusage;
>  void  calctsru(struct tusage *, struct timespec *, struct timespec *,
>           struct timespec *);
> Index: sys/sysctl.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/sysctl.h,v
> retrieving revision 1.185
> diff -u -p -r1.185 sysctl.h
> --- sys/sysctl.h      9 May 2019 14:59:30 -0000       1.185
> +++ sys/sysctl.h      12 May 2019 22:05:05 -0000
> @@ -574,6 +574,7 @@ do {                                                      
>                 \
>       (kp)->p_jobc = (pg)->pg_jobc;                                   \
>                                                                       \
>       (kp)->p_estcpu = (p)->p_estcpu;                                 \
> +     mtx_enter(&(pr)->ps_mtx);                                       \
>       if (isthread) {                                                 \
>               (kp)->p_rtime_sec = (p)->p_tu.tu_runtime.tv_sec;        \
>               (kp)->p_rtime_usec = (p)->p_tu.tu_runtime.tv_nsec/1000; \
> @@ -589,6 +590,7 @@ do {                                                      
>                 \
>               (kp)->p_sticks = (pr)->ps_tu.tu_sticks;                 \
>               (kp)->p_iticks = (pr)->ps_tu.tu_iticks;                 \
>       }                                                               \
> +     mtx_leave(&(pr)->ps_mtx);                                       \
>       (kp)->p_cpticks = (p)->p_cpticks;                               \
>                                                                       \
>       if (show_addresses)                                             \
> 

Reply via email to