On 12/05/19(Sun) 18:17, Martin Pieuchot wrote:
> People started complaining that the SCHED_LOCK() is contended.  Here's a
> first round at reducing its scope.
> 
> Diff below introduces a per-process mutex to protect time accounting
> fields accessed in tuagg().  tuagg() is principally called in mi_switch()
> where the SCHED_LOCK() is currently held.  Moving these fields out of
> its scope allows us to drop some SCHED_LOCK/UNLOCK dances in accounting
> path.
> 
> Note that hardclock(9) still increments p_{u,s,i}ticks without holding a
> lock.  I doubt it's worth doing anything so this diff doesn't change
> anything in that regard.

Updated diff:

- Use struct assignment instead of timespecadd() to initialize
  `tu_runtime', pointed out by visa@.
- Do not use mtx_enter(9)/leave(9) in libkvm's version of FILL_PROC(),
  pointed out by guenther@ and lteo@.

Oks?

Index: kern/init_main.c
===================================================================
RCS file: /cvs/src/sys/kern/init_main.c,v
retrieving revision 1.284
diff -u -p -r1.284 init_main.c
--- kern/init_main.c    26 Feb 2019 14:24:21 -0000      1.284
+++ kern/init_main.c    25 May 2019 18:53:51 -0000
@@ -518,7 +518,9 @@ main(void *framep)
                getnanotime(&pr->ps_start);
                TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
                        nanouptime(&p->p_cpu->ci_schedstate.spc_runtime);
+                       mtx_enter(&pr->ps_mtx);
                        timespecclear(&p->p_rtime);
+                       mtx_leave(&pr->ps_mtx);
                }
        }
 
Index: kern/kern_acct.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_acct.c,v
retrieving revision 1.36
diff -u -p -r1.36 kern_acct.c
--- kern/kern_acct.c    28 Apr 2018 03:13:04 -0000      1.36
+++ kern/kern_acct.c    25 May 2019 18:53:51 -0000
@@ -179,7 +179,9 @@ acct_process(struct proc *p)
        memcpy(acct.ac_comm, pr->ps_comm, sizeof acct.ac_comm);
 
        /* (2) The amount of user and system time that was used */
+       mtx_enter(&pr->ps_mtx);
        calctsru(&pr->ps_tu, &ut, &st, NULL);
+       mtx_leave(&pr->ps_mtx);
        acct.ac_utime = encode_comp_t(ut.tv_sec, ut.tv_nsec);
        acct.ac_stime = encode_comp_t(st.tv_sec, st.tv_nsec);
 
Index: kern/kern_exec.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_exec.c,v
retrieving revision 1.203
diff -u -p -r1.203 kern_exec.c
--- kern/kern_exec.c    8 Feb 2019 12:51:57 -0000       1.203
+++ kern/kern_exec.c    25 May 2019 18:53:51 -0000
@@ -661,8 +661,10 @@ sys_execve(struct proc *p, void *v, regi
        }
 
        /* reset CPU time usage for the thread, but not the process */
+       mtx_enter(&pr->ps_mtx);
        timespecclear(&p->p_tu.tu_runtime);
        p->p_tu.tu_uticks = p->p_tu.tu_sticks = p->p_tu.tu_iticks = 0;
+       mtx_leave(&pr->ps_mtx);
 
        km_free(argp, NCARGS, &kv_exec, &kp_pageable);
 
Index: kern/kern_exit.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_exit.c,v
retrieving revision 1.174
diff -u -p -r1.174 kern_exit.c
--- kern/kern_exit.c    13 May 2019 19:21:31 -0000      1.174
+++ kern/kern_exit.c    25 May 2019 18:53:52 -0000
@@ -284,7 +284,7 @@ exit1(struct proc *p, int rv, int flags)
 
        /* add thread's accumulated rusage into the process's total */
        ruadd(rup, &p->p_ru);
-       tuagg(pr, p);
+       tuagg(p, NULL);
 
        /*
         * clear %cpu usage during swap
@@ -296,7 +296,9 @@ exit1(struct proc *p, int rv, int flags)
                 * Final thread has died, so add on our children's rusage
                 * and calculate the total times
                 */
+               mtx_enter(&pr->ps_mtx);
                calcru(&pr->ps_tu, &rup->ru_utime, &rup->ru_stime, NULL);
+               mtx_leave(&pr->ps_mtx);
                ruadd(rup, &pr->ps_cru);
 
                /* notify interested parties of our demise and clean up */
Index: kern/kern_fork.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.209
diff -u -p -r1.209 kern_fork.c
--- kern/kern_fork.c    6 Jan 2019 12:59:45 -0000       1.209
+++ kern/kern_fork.c    25 May 2019 18:53:52 -0000
@@ -55,6 +55,7 @@
 #include <sys/sysctl.h>
 #include <sys/pool.h>
 #include <sys/mman.h>
+#include <sys/mutex.h>
 #include <sys/ptrace.h>
 #include <sys/atomic.h>
 #include <sys/pledge.h>
@@ -209,6 +210,8 @@ process_initialize(struct process *pr, s
        LIST_INIT(&pr->ps_ftlist);
        LIST_INIT(&pr->ps_kqlist);
        LIST_INIT(&pr->ps_sigiolst);
+
+       mtx_init(&pr->ps_mtx, IPL_MPFLOOR);
 
        timeout_set(&pr->ps_realit_to, realitexpire, pr);
        timeout_set(&pr->ps_rucheck_to, rucheck, pr);
Index: kern/kern_resource.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_resource.c,v
retrieving revision 1.59
diff -u -p -r1.59 kern_resource.c
--- kern/kern_resource.c        6 Jan 2019 12:59:45 -0000       1.59
+++ kern/kern_resource.c        25 May 2019 18:53:52 -0000
@@ -47,13 +47,13 @@
 #include <sys/ktrace.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
-
+#include <sys/mutex.h>
 #include <sys/mount.h>
 #include <sys/syscallargs.h>
 
 #include <uvm/uvm_extern.h>
 
-void   tuagg_sub(struct tusage *, struct proc *);
+void   tuagg_sub(struct tusage *, struct tusage *);
 
 /*
  * Patchable maximum data and stack limits.
@@ -338,12 +338,12 @@ sys_getrlimit(struct proc *p, void *v, r
 }
 
 void
-tuagg_sub(struct tusage *tup, struct proc *p)
+tuagg_sub(struct tusage *ttup, struct tusage *ftup)
 {
-       timespecadd(&tup->tu_runtime, &p->p_rtime, &tup->tu_runtime);
-       tup->tu_uticks += p->p_uticks;
-       tup->tu_sticks += p->p_sticks;
-       tup->tu_iticks += p->p_iticks;
+       timespecadd(&ttup->tu_runtime, &ftup->tu_runtime, &ttup->tu_runtime);
+       ttup->tu_uticks += ftup->tu_uticks;
+       ttup->tu_sticks += ftup->tu_sticks;
+       ttup->tu_iticks += ftup->tu_iticks;
 }
 
 /*
@@ -351,24 +351,26 @@ tuagg_sub(struct tusage *tup, struct pro
  * totals for the thread and process
  */
 void
-tuagg_unlocked(struct process *pr, struct proc *p)
+tuagg(struct proc *p, struct timespec *tsp)
 {
-       tuagg_sub(&pr->ps_tu, p);
-       tuagg_sub(&p->p_tu, p);
-       timespecclear(&p->p_rtime);
+       struct process *pr = p->p_p;
+       struct tusage tu;
+
+       mtx_enter(&pr->ps_mtx);
+       tu.tu_uticks = p->p_uticks;
+       tu.tu_sticks = p->p_sticks;
+       tu.tu_iticks = p->p_iticks;
        p->p_uticks = 0;
        p->p_sticks = 0;
        p->p_iticks = 0;
-}
-
-void
-tuagg(struct process *pr, struct proc *p)
-{
-       int s;
+       if (tsp != NULL)
+               timespecadd(&p->p_rtime, tsp, &p->p_rtime);
+       tu.tu_runtime = p->p_rtime;
+       timespecclear(&p->p_rtime);
+       tuagg_sub(&pr->ps_tu, &tu);
+       tuagg_sub(&p->p_tu, &tu);
+       mtx_leave(&pr->ps_mtx);
 
-       SCHED_LOCK(s);
-       tuagg_unlocked(pr, p);
-       SCHED_UNLOCK(s);
 }
 
 /*
@@ -461,15 +463,19 @@ dogetrusage(struct proc *p, int who, str
                /* add on all living threads */
                TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
                        ruadd(rup, &q->p_ru);
-                       tuagg(pr, q);
+                       tuagg(q, NULL);
                }
 
+               mtx_enter(&pr->ps_mtx);
                calcru(&pr->ps_tu, &rup->ru_utime, &rup->ru_stime, NULL);
+               mtx_leave(&pr->ps_mtx);
                break;
 
        case RUSAGE_THREAD:
                *rup = p->p_ru;
+               mtx_enter(&pr->ps_mtx);
                calcru(&p->p_tu, &rup->ru_utime, &rup->ru_stime, NULL);
+               mtx_leave(&pr->ps_mtx);
                break;
 
        case RUSAGE_CHILDREN:
@@ -507,13 +513,12 @@ rucheck(void *arg)
        struct process *pr = arg;
        struct rlimit *rlim;
        rlim_t runtime;
-       int s;
 
        KERNEL_ASSERT_LOCKED();
 
-       SCHED_LOCK(s);
+       mtx_enter(&pr->ps_mtx);
        runtime = pr->ps_tu.tu_runtime.tv_sec;
-       SCHED_UNLOCK(s);
+       mtx_leave(&pr->ps_mtx);
 
        rlim = &pr->ps_limit->pl_rlimit[RLIMIT_CPU];
        if (runtime >= rlim->rlim_cur) {
Index: kern/kern_sched.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.56
diff -u -p -r1.56 kern_sched.c
--- kern/kern_sched.c   26 Mar 2019 04:24:22 -0000      1.56
+++ kern/kern_sched.c   25 May 2019 18:53:52 -0000
@@ -212,13 +212,17 @@ void
 sched_exit(struct proc *p)
 {
        struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
+       struct process *pr = p->p_p;
        struct timespec ts;
        struct proc *idle;
        int s;
 
        nanouptime(&ts);
        timespecsub(&ts, &spc->spc_runtime, &ts);
+
+       mtx_enter(&pr->ps_mtx);
        timespecadd(&p->p_rtime, &ts, &p->p_rtime);
+       mtx_leave(&pr->ps_mtx);
 
        LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash);
 
Index: kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.355
diff -u -p -r1.355 kern_sysctl.c
--- kern/kern_sysctl.c  9 May 2019 14:59:30 -0000       1.355
+++ kern/kern_sysctl.c  25 May 2019 18:53:52 -0000
@@ -1641,7 +1641,9 @@ fill_kproc(struct process *pr, struct ki
        if ((pr->ps_flags & PS_ZOMBIE) == 0) {
                if ((pr->ps_flags & PS_EMBRYO) == 0 && vm != NULL)
                        ki->p_vm_rssize = vm_resident_count(vm);
+               mtx_enter(&pr->ps_mtx);
                calctsru(isthread ? &p->p_tu : &pr->ps_tu, &ut, &st, NULL);
+               mtx_leave(&pr->ps_mtx);
                ki->p_uutime_sec = ut.tv_sec;
                ki->p_uutime_usec = ut.tv_nsec/1000;
                ki->p_ustime_sec = st.tv_sec;
Index: kern/kern_time.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_time.c,v
retrieving revision 1.116
diff -u -p -r1.116 kern_time.c
--- kern/kern_time.c    21 May 2019 08:30:35 -0000      1.116
+++ kern/kern_time.c    25 May 2019 18:53:52 -0000
@@ -106,6 +106,7 @@ settime(const struct timespec *ts)
 int
 clock_gettime(struct proc *p, clockid_t clock_id, struct timespec *tp)
 {
+       struct process *pr = p->p_p;
        struct bintime bt;
        struct proc *q;
        int error = 0;
@@ -126,14 +127,18 @@ clock_gettime(struct proc *p, clockid_t 
        case CLOCK_PROCESS_CPUTIME_ID:
                nanouptime(tp);
                timespecsub(tp, &curcpu()->ci_schedstate.spc_runtime, tp);
-               timespecadd(tp, &p->p_p->ps_tu.tu_runtime, tp);
+               mtx_enter(&pr->ps_mtx);
+               timespecadd(tp, &pr->ps_tu.tu_runtime, tp);
                timespecadd(tp, &p->p_rtime, tp);
+               mtx_leave(&pr->ps_mtx);
                break;
        case CLOCK_THREAD_CPUTIME_ID:
                nanouptime(tp);
                timespecsub(tp, &curcpu()->ci_schedstate.spc_runtime, tp);
+               mtx_enter(&pr->ps_mtx);
                timespecadd(tp, &p->p_tu.tu_runtime, tp);
                timespecadd(tp, &p->p_rtime, tp);
+               mtx_leave(&pr->ps_mtx);
                break;
        default:
                /* check for clock from pthread_getcpuclockid() */
@@ -142,8 +147,11 @@ clock_gettime(struct proc *p, clockid_t 
                        q = tfind(__CLOCK_PTID(clock_id) - THREAD_PID_OFFSET);
                        if (q == NULL || q->p_p != p->p_p)
                                error = ESRCH;
-                       else
+                       else {
+                               mtx_enter(&pr->ps_mtx);
                                *tp = q->p_tu.tu_runtime;
+                               mtx_leave(&pr->ps_mtx);
+                       }
                        KERNEL_UNLOCK();
                } else
                        error = EINVAL;
Index: kern/sched_bsd.c
===================================================================
RCS file: /cvs/src/sys/kern/sched_bsd.c,v
retrieving revision 1.51
diff -u -p -r1.51 sched_bsd.c
--- kern/sched_bsd.c    25 May 2019 18:11:10 -0000      1.51
+++ kern/sched_bsd.c    25 May 2019 18:53:52 -0000
@@ -343,7 +343,6 @@ mi_switch(void)
        struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
        struct proc *p = curproc;
        struct proc *nextproc;
-       struct process *pr = p->p_p;
        struct timespec ts;
 #ifdef MULTIPROCESSOR
        int hold_count;
@@ -381,11 +380,10 @@ mi_switch(void)
 #endif
        } else {
                timespecsub(&ts, &spc->spc_runtime, &ts);
-               timespecadd(&p->p_rtime, &ts, &p->p_rtime);
        }
 
        /* add the time counts for this thread to the process's total */
-       tuagg_unlocked(pr, p);
+       tuagg(p, &ts);
 
        /*
         * Process is about to yield the CPU; clear the appropriate
Index: kern/tty.c
===================================================================
RCS file: /cvs/src/sys/kern/tty.c,v
retrieving revision 1.144
diff -u -p -r1.144 tty.c
--- kern/tty.c  13 May 2019 19:21:31 -0000      1.144
+++ kern/tty.c  25 May 2019 18:53:53 -0000
@@ -2191,7 +2191,9 @@ update_pickpr:
                rss = (pickpr->ps_flags & (PS_EMBRYO | PS_ZOMBIE)) ? 0 :
                    vm_resident_count(pickpr->ps_vmspace);
 
+               mtx_enter(&pickpr->ps_mtx);
                calctsru(&pickpr->ps_tu, &utime, &stime, NULL);
+               mtx_leave(&pickpr->ps_mtx);
 
                /* Round up and print user time. */
                utime.tv_nsec += 5000000;
Index: sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.265
diff -u -p -r1.265 proc.h
--- sys/proc.h  13 May 2019 19:21:31 -0000      1.265
+++ sys/proc.h  25 May 2019 18:53:53 -0000
@@ -154,6 +154,11 @@ RBT_HEAD(unvname_rbt, unvname);
 struct futex;
 LIST_HEAD(futex_list, futex);
 struct unveil;
+
+/*
+ *  Locks used to protect struct members in this file:
+ *     m       this process' `ps_mtx'
+ */
 struct process {
        /*
         * ps_mainproc is the original thread in the process.
@@ -181,6 +186,7 @@ struct process {
 
        struct  futex_list ps_ftlist;   /* futexes attached to this process */
        LIST_HEAD(, kqueue) ps_kqlist;  /* kqueues attached to this process */
+       struct  mutex   ps_mtx;         /* per-process mutex */
 
 /* The following fields are all zeroed upon creation in process_new. */
 #define        ps_startzero    ps_klist
@@ -200,7 +206,7 @@ struct process {
        struct  ptrace_state *ps_ptstat;/* Ptrace state */
 
        struct  rusage *ps_ru;          /* sum of stats for dead threads. */
-       struct  tusage ps_tu;           /* accumulated times. */
+       struct  tusage ps_tu;           /* [m] accumulated times. */
        struct  rusage ps_cru;          /* sum of stats for reaped children */
        struct  itimerval ps_timer[3];  /* timers, indexed by ITIMER_* */
        struct  timeout ps_rucheck_to;  /* resource limit check timer */
@@ -301,12 +307,14 @@ struct lock_list_entry;
 /*
  *  Locks used to protect struct members in this file:
  *     s       scheduler lock
+ *     I       immutable after creation
+ *     pm      parent process `ps_mtx'
  */
 struct proc {
        TAILQ_ENTRY(proc) p_runq;       /* [s] current run/sleep queue */
        LIST_ENTRY(proc) p_list;        /* List of all threads. */
 
-       struct  process *p_p;           /* The process of this thread. */
+       struct  process *p_p;           /* [I] The process of this thread. */
        TAILQ_ENTRY(proc) p_thr_link;   /* Threads in a process linkage. */
 
        TAILQ_ENTRY(proc) p_fut_link;   /* Threads in a futex linkage. */
@@ -337,17 +345,17 @@ struct proc {
        int     p_cpticks;       /* Ticks of cpu time. */
        const volatile void *p_wchan;   /* [s] Sleep address. */
        struct  timeout p_sleep_to;/* timeout for tsleep() */
+       struct  cpu_info * volatile p_cpu; /* [s] CPU we're running on. */
        const char *p_wmesg;            /* [s] Reason for sleep. */
        fixpt_t p_pctcpu;               /* [s] %cpu for this thread */
        u_int   p_slptime;              /* [s] Time since last blocked. */
-       u_int   p_uticks;               /* Statclock hits in user mode. */
-       u_int   p_sticks;               /* Statclock hits in system mode. */
-       u_int   p_iticks;               /* Statclock hits processing intr. */
-       struct  cpu_info * volatile p_cpu; /* [s] CPU we're running on. */
+       u_int   p_uticks;               /* [pm] Statclock hits in user mode */
+       u_int   p_sticks;               /* [pm] Statclock hits in system mode */
+       u_int   p_iticks;               /* [pm] Statclock hits in intr. */
 
        struct  rusage p_ru;            /* Statistics */
-       struct  tusage p_tu;            /* accumulated times. */
-       struct  timespec p_rtime;       /* Real time. */
+       struct  tusage p_tu;            /* [pm] accumulated times. */
+       struct  timespec p_rtime;       /* [pm] Real time. */
 
        int      p_siglist;             /* Signals arrived but not delivered. */
 
Index: sys/resourcevar.h
===================================================================
RCS file: /cvs/src/sys/sys/resourcevar.h,v
retrieving revision 1.19
diff -u -p -r1.19 resourcevar.h
--- sys/resourcevar.h   6 Jan 2019 12:59:45 -0000       1.19
+++ sys/resourcevar.h   25 May 2019 18:53:53 -0000
@@ -58,8 +58,7 @@ do {                                                          
        \
 #ifdef _KERNEL
 void    addupc_intr(struct proc *, u_long);
 void    addupc_task(struct proc *, u_long, u_int);
-void    tuagg_unlocked(struct process *, struct proc *);
-void    tuagg(struct process *, struct proc *);
+void    tuagg(struct proc *, struct timespec *);
 struct tusage;
 void    calctsru(struct tusage *, struct timespec *, struct timespec *,
            struct timespec *);
Index: sys/sysctl.h
===================================================================
RCS file: /cvs/src/sys/sys/sysctl.h,v
retrieving revision 1.186
diff -u -p -r1.186 sysctl.h
--- sys/sysctl.h        13 May 2019 19:21:31 -0000      1.186
+++ sys/sysctl.h        25 May 2019 18:53:53 -0000
@@ -536,6 +536,14 @@ struct kinfo_vmentry {
  * p_tpgid, p_tsess, p_vm_rssize, p_u[us]time_{sec,usec}, p_cpuid
  */
 
+#if defined(_KERNEL)
+#define PR_LOCK(pr)    mtx_enter(&(pr)->ps_mtx)
+#define PR_UNLOCK(pr)  mtx_leave(&(pr)->ps_mtx)
+#else
+#define PR_LOCK(pr)    /* nothing */
+#define PR_UNLOCK(pr)  /* nothing */
+#endif
+
 #define PTRTOINT64(_x) ((u_int64_t)(u_long)(_x))
 
 #define FILL_KPROC(kp, copy_str, p, pr, uc, pg, paddr, \
@@ -574,6 +582,7 @@ do {                                                        
                \
        (kp)->p_jobc = (pg)->pg_jobc;                                   \
                                                                        \
        (kp)->p_estcpu = (p)->p_estcpu;                                 \
+       PR_LOCK(pr);                                                    \
        if (isthread) {                                                 \
                (kp)->p_rtime_sec = (p)->p_tu.tu_runtime.tv_sec;        \
                (kp)->p_rtime_usec = (p)->p_tu.tu_runtime.tv_nsec/1000; \
@@ -589,6 +598,7 @@ do {                                                        
                \
                (kp)->p_sticks = (pr)->ps_tu.tu_sticks;                 \
                (kp)->p_iticks = (pr)->ps_tu.tu_iticks;                 \
        }                                                               \
+       PR_UNLOCK(pr);                                                  \
        (kp)->p_cpticks = (p)->p_cpticks;                               \
                                                                        \
        if (show_addresses)                                             \

Reply via email to