Every multi-threaded process keeps a list of threads in `ps_threads'.
This list is iterated in interrupt and process context which makes it
complicated to protect it with a rwlock.

One of the places where such iteration is done is inside the tsleep(9)
routines, directly in single_thread_check() or via CURSIG().  In order
to take this code path out of the KERNEL_LOCK(), claudio@ proposed to
use SMR_TAILQ.  This has the advantage of not introducing lock
dependencies and allow us to address every iteration one-by-one.

Diff below is a first step into this direction, it replaces the existing
TAILQ_* macros by the locked version of SMR_TAILQ*.  This is mostly lifted
from claudio@'s diff and should not introduce any side effect.

ok?

diff --git lib/libkvm/kvm_proc2.c lib/libkvm/kvm_proc2.c
index 96f7dc91b92..1f4f9b914bb 100644
--- lib/libkvm/kvm_proc2.c
+++ lib/libkvm/kvm_proc2.c
@@ -341,8 +341,9 @@ kvm_proclist(kvm_t *kd, int op, int arg, struct process *pr,
                        kp.p_pctcpu = 0;
                        kp.p_stat = (process.ps_flags & PS_ZOMBIE) ? SDEAD :
                            SIDL;
-                       for (p = TAILQ_FIRST(&process.ps_threads); p != NULL; 
-                           p = TAILQ_NEXT(&proc, p_thr_link)) {
+                       for (p = SMR_TAILQ_FIRST_LOCKED(&process.ps_threads);
+                           p != NULL;
+                           p = SMR_TAILQ_NEXT_LOCKED(&proc, p_thr_link)) {
                                if (KREAD(kd, (u_long)p, &proc)) {
                                        _kvm_err(kd, kd->program,
                                            "can't read proc at %lx",
@@ -376,8 +377,8 @@ kvm_proclist(kvm_t *kd, int op, int arg, struct process *pr,
                if (!dothreads)
                        continue;
 
-               for (p = TAILQ_FIRST(&process.ps_threads); p != NULL; 
-                   p = TAILQ_NEXT(&proc, p_thr_link)) {
+               for (p = SMR_TAILQ_FIRST_LOCKED(&process.ps_threads); p != NULL;
+                   p = SMR_TAILQ_NEXT_LOCKED(&proc, p_thr_link)) {
                        if (KREAD(kd, (u_long)p, &proc)) {
                                _kvm_err(kd, kd->program,
                                    "can't read proc at %lx",
diff --git sys/kern/exec_elf.c sys/kern/exec_elf.c
index 5e455208663..575273b306c 100644
--- sys/kern/exec_elf.c
+++ sys/kern/exec_elf.c
@@ -85,6 +85,7 @@
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/pledge.h>
+#include <sys/smr.h>
 
 #include <sys/mman.h>
 
@@ -1360,7 +1361,7 @@ coredump_notes_elf(struct proc *p, void *iocookie, size_t 
*sizep)
         * threads in the process have been stopped and the list can't
         * change.
         */
-       TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
+       SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads, p_thr_link) {
                if (q == p)             /* we've taken care of this thread */
                        continue;
                error = coredump_note_elf(q, iocookie, &notesize);
diff --git sys/kern/init_main.c sys/kern/init_main.c
index fed6be19435..2b657ffe328 100644
--- sys/kern/init_main.c
+++ sys/kern/init_main.c
@@ -519,7 +519,7 @@ main(void *framep)
         */
        LIST_FOREACH(pr, &allprocess, ps_list) {
                nanouptime(&pr->ps_start);
-               TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
+               SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) {
                        nanouptime(&p->p_cpu->ci_schedstate.spc_runtime);
                        timespecclear(&p->p_rtime);
                }
diff --git sys/kern/kern_exit.c sys/kern/kern_exit.c
index a20775419e3..ffc0158954c 100644
--- sys/kern/kern_exit.c
+++ sys/kern/kern_exit.c
@@ -63,6 +63,7 @@
 #ifdef SYSVSEM
 #include <sys/sem.h>
 #endif
+#include <sys/smr.h>
 #include <sys/witness.h>
 
 #include <sys/mount.h>
@@ -161,7 +162,7 @@ exit1(struct proc *p, int xexit, int xsig, int flags)
        }
 
        /* unlink ourselves from the active threads */
-       TAILQ_REMOVE(&pr->ps_threads, p, p_thr_link);
+       SMR_TAILQ_REMOVE_LOCKED(&pr->ps_threads, p, p_thr_link);
        if ((p->p_flag & P_THREAD) == 0) {
                /* main thread gotta wait because it has the pid, et al */
                while (pr->ps_refcnt > 1)
@@ -724,7 +725,7 @@ process_zap(struct process *pr)
        if (pr->ps_ptstat != NULL)
                free(pr->ps_ptstat, M_SUBPROC, sizeof(*pr->ps_ptstat));
        pool_put(&rusage_pool, pr->ps_ru);
-       KASSERT(TAILQ_EMPTY(&pr->ps_threads));
+       KASSERT(SMR_TAILQ_EMPTY_LOCKED(&pr->ps_threads));
        lim_free(pr->ps_limit);
        crfree(pr->ps_ucred);
        pool_put(&process_pool, pr);
diff --git sys/kern/kern_fork.c sys/kern/kern_fork.c
index 9fb239bc8b4..e1cb587b2b8 100644
--- sys/kern/kern_fork.c
+++ sys/kern/kern_fork.c
@@ -52,6 +52,7 @@
 #include <sys/acct.h>
 #include <sys/ktrace.h>
 #include <sys/sched.h>
+#include <sys/smr.h>
 #include <sys/sysctl.h>
 #include <sys/pool.h>
 #include <sys/mman.h>
@@ -179,8 +180,8 @@ process_initialize(struct process *pr, struct proc *p)
 {
        /* initialize the thread links */
        pr->ps_mainproc = p;
-       TAILQ_INIT(&pr->ps_threads);
-       TAILQ_INSERT_TAIL(&pr->ps_threads, p, p_thr_link);
+       SMR_TAILQ_INIT(&pr->ps_threads);
+       SMR_TAILQ_INSERT_TAIL_LOCKED(&pr->ps_threads, p, p_thr_link);
        pr->ps_refcnt = 1;
        p->p_p = pr;
 
@@ -557,7 +558,7 @@ thread_fork(struct proc *curp, void *stack, void *tcb, 
pid_t *tidptr,
 
        LIST_INSERT_HEAD(&allproc, p, p_list);
        LIST_INSERT_HEAD(TIDHASH(p->p_tid), p, p_hash);
-       TAILQ_INSERT_TAIL(&pr->ps_threads, p, p_thr_link);
+       SMR_TAILQ_INSERT_TAIL_LOCKED(&pr->ps_threads, p, p_thr_link);
 
        /*
         * if somebody else wants to take us to single threaded mode,
diff --git sys/kern/kern_proc.c sys/kern/kern_proc.c
index 70b781e53cb..a2acc741834 100644
--- sys/kern/kern_proc.c
+++ sys/kern/kern_proc.c
@@ -502,7 +502,7 @@ db_kill_cmd(db_expr_t addr, int have_addr, db_expr_t count, 
char *modif)
                return;
        }
 
-       p = TAILQ_FIRST(&pr->ps_threads);
+       p = SMR_TAILQ_FIRST_LOCKED(&pr->ps_threads);
 
        /* Send uncatchable SIGABRT for coredump */
        sigabort(p);
@@ -558,7 +558,7 @@ db_show_all_procs(db_expr_t addr, int haddr, db_expr_t 
count, char *modif)
        while (pr != NULL) {
                ppr = pr->ps_pptr;
 
-               TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
+               SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) {
 #ifdef MULTIPROCESSOR
                        if (__mp_lock_held(&kernel_lock, p->p_cpu))
                                has_kernel_lock = 1;
diff --git sys/kern/kern_resource.c sys/kern/kern_resource.c
index 0111bfa733a..86e855eda43 100644
--- sys/kern/kern_resource.c
+++ sys/kern/kern_resource.c
@@ -211,11 +211,11 @@ donice(struct proc *curp, struct process *chgpr, int n)
        if (n < chgpr->ps_nice && suser(curp))
                return (EACCES);
        chgpr->ps_nice = n;
-       SCHED_LOCK(s);
-       TAILQ_FOREACH(p, &chgpr->ps_threads, p_thr_link) {
+       SMR_TAILQ_FOREACH_LOCKED(p, &chgpr->ps_threads, p_thr_link) {
+               SCHED_LOCK(s);
                setpriority(p, p->p_estcpu, n);
+               SCHED_UNLOCK(s);
        }
-       SCHED_UNLOCK(s);
        return (0);
 }
 
@@ -488,7 +488,7 @@ dogetrusage(struct proc *p, int who, struct rusage *rup)
                        memset(rup, 0, sizeof(*rup));
 
                /* add on all living threads */
-               TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
+               SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads, p_thr_link) {
                        ruadd(rup, &q->p_ru);
                        tuagg(pr, q);
                }
diff --git sys/kern/kern_sig.c sys/kern/kern_sig.c
index 0cd78b46de5..269494bd142 100644
--- sys/kern/kern_sig.c
+++ sys/kern/kern_sig.c
@@ -925,7 +925,7 @@ ptsignal(struct proc *p, int signum, enum signal_type type)
                         * delayed.  Otherwise, mark it pending on the
                         * main thread.
                         */
-                       TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
+                       SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads, 
p_thr_link) {
                                /* ignore exiting threads */
                                if (q->p_flag & P_WEXIT)
                                        continue;
@@ -1009,7 +1009,7 @@ ptsignal(struct proc *p, int signum, enum signal_type 
type)
         * XXX delay processing of SA_STOP signals unless action == SIG_DFL?
         */
        if (prop & (SA_CONT | SA_STOP) && type != SPROPAGATED)
-               TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link)
+               SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads, p_thr_link)
                        if (q != p)
                                ptsignal(q, signum, SPROPAGATED);
 
@@ -2026,7 +2026,7 @@ single_thread_set(struct proc *p, enum single_thread_mode 
mode, int deep)
        pr->ps_singlecount = 0;
        membar_producer();
        pr->ps_single = p;
-       TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
+       SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads, p_thr_link) {
                if (q == p)
                        continue;
                if (q->p_flag & P_WEXIT) {
@@ -2118,7 +2118,7 @@ single_thread_clear(struct proc *p, int flag)
        SCHED_LOCK(s);
        pr->ps_single = NULL;
        atomic_clearbits_int(&pr->ps_flags, PS_SINGLEUNWIND | PS_SINGLEEXIT);
-       TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
+       SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads, p_thr_link) {
                if (q == p || (q->p_flag & P_SUSPSINGLE) == 0)
                        continue;
                atomic_clearbits_int(&q->p_flag, P_SUSPSINGLE);
diff --git sys/kern/kern_synch.c sys/kern/kern_synch.c
index f0338e8f90f..9ef710acf4d 100644
--- sys/kern/kern_synch.c
+++ sys/kern/kern_synch.c
@@ -50,6 +50,7 @@
 #include <sys/pool.h>
 #include <sys/refcnt.h>
 #include <sys/atomic.h>
+#include <sys/smr.h>
 #include <sys/witness.h>
 #include <sys/tracepoint.h>
 
@@ -633,7 +634,7 @@ sys_sched_yield(struct proc *p, void *v, register_t *retval)
         * can make some progress.
         */
        newprio = p->p_usrpri;
-       TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link)
+       SMR_TAILQ_FOREACH_LOCKED(q, &p->p_p->ps_threads, p_thr_link)
                newprio = max(newprio, q->p_runpri);
        setrunqueue(p->p_cpu, p, newprio);
        p->p_ru.ru_nvcsw++;
diff --git sys/kern/kern_sysctl.c sys/kern/kern_sysctl.c
index a275fa7b6a9..da4fe6a22cf 100644
--- sys/kern/kern_sysctl.c
+++ sys/kern/kern_sysctl.c
@@ -1596,7 +1596,7 @@ again:
                if (!dothreads)
                        continue;
 
-               TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
+               SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) {
                        if (buflen >= elem_size && elem_count > 0) {
                                fill_kproc(pr, kproc, p, show_pointers);
                                error = copyout(kproc, dp, elem_size);
@@ -1696,7 +1696,7 @@ fill_kproc(struct process *pr, struct kinfo_proc *ki, 
struct proc *p,
        } else {
                ki->p_pctcpu = 0;
                ki->p_stat = (pr->ps_flags & PS_ZOMBIE) ? SDEAD : SIDL;
-               TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
+               SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) {
                        ki->p_pctcpu += p->p_pctcpu;
                        /* find best state: ONPROC > RUN > STOP > SLEEP > .. */
                        if (p->p_stat == SONPROC || ki->p_stat == SONPROC)
diff --git sys/kern/subr_witness.c sys/kern/subr_witness.c
index 6ee1619da2b..8e44bd930bc 100644
--- sys/kern/subr_witness.c
+++ sys/kern/subr_witness.c
@@ -1888,7 +1888,7 @@ witness_process_has_locks(struct process *pr)
 {
        struct proc *p;
 
-       TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
+       SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) {
                if (witness_thread_has_locks(p))
                        return (1);
        }
@@ -2108,7 +2108,7 @@ db_witness_list_all(db_expr_t addr, int have_addr, 
db_expr_t count, char *modif)
        LIST_FOREACH(pr, &allprocess, ps_list) {
                if (!witness_process_has_locks(pr))
                        continue;
-               TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
+               SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) {
                        if (!witness_thread_has_locks(p))
                                continue;
                        db_printf("Process %d (%s) thread %p (%d)\n",
diff --git sys/kern/sys_process.c sys/kern/sys_process.c
index 4f6bc30cd76..1ddf711b2a6 100644
--- sys/kern/sys_process.c
+++ sys/kern/sys_process.c
@@ -301,7 +301,7 @@ ptrace_ctrl(struct proc *p, int req, pid_t pid, caddr_t 
addr, int data)
                        error = ESRCH;
                        goto fail;
                }
-               t = TAILQ_FIRST(&tr->ps_threads);
+               t = SMR_TAILQ_FIRST_LOCKED(&tr->ps_threads);
                break;
 
        /* calls that accept a PID or a thread ID */
@@ -561,9 +561,9 @@ ptrace_kstate(struct proc *p, int req, pid_t pid, void 
*addr)
                                return ESRCH;
                        if (t->p_p != tr)
                                return EINVAL;
-                       t = TAILQ_NEXT(t, p_thr_link);
+                       t = SMR_TAILQ_NEXT_LOCKED(t, p_thr_link);
                } else {
-                       t = TAILQ_FIRST(&tr->ps_threads);
+                       t = SMR_TAILQ_FIRST_LOCKED(&tr->ps_threads);
                }
 
                if (t == NULL)
@@ -754,7 +754,7 @@ process_tprfind(pid_t tpid, struct proc **tp)
 
                if (tr == NULL)
                        return NULL;
-               *tp = TAILQ_FIRST(&tr->ps_threads);
+               *tp = SMR_TAILQ_FIRST_LOCKED(&tr->ps_threads);
                return tr;
        }
 }
diff --git sys/kern/tty.c sys/kern/tty.c
index efd8bbf5cb9..d1bbfa08540 100644
--- sys/kern/tty.c
+++ sys/kern/tty.c
@@ -2118,7 +2118,7 @@ process_sum(struct process *pr, fixpt_t *estcpup)
 
        ret = 0;
        estcpu = 0;
-       TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
+       SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) {
                if (p->p_stat == SRUN || p->p_stat == SONPROC)
                        ret = 1;
                estcpu += p->p_pctcpu;
@@ -2220,12 +2220,12 @@ update_pickpr:
                 *  - prefer living
                 * Otherwise take the newest thread
                 */
-               pick = p = TAILQ_FIRST(&pickpr->ps_threads);
+               pick = p = SMR_TAILQ_FIRST(&pickpr->ps_threads);
                if (p == NULL)
                        goto empty;
                run = p->p_stat == SRUN || p->p_stat == SONPROC;
                pctcpu = p->p_pctcpu;
-               while ((p = TAILQ_NEXT(p, p_thr_link)) != NULL) {
+               while ((p = SMR_TAILQ_NEXT_LOCKED(p, p_thr_link)) != NULL) {
                        run2 = p->p_stat == SRUN || p->p_stat == SONPROC;
                        pctcpu2 = p->p_pctcpu;
                        if (run) {
diff --git sys/sys/proc.h sys/sys/proc.h
index d4ec6450a7a..196e48ce9a6 100644
--- sys/sys/proc.h
+++ sys/sys/proc.h
@@ -50,6 +50,7 @@
 #include <sys/resource.h>              /* For struct rusage */
 #include <sys/rwlock.h>                        /* For struct rwlock */
 #include <sys/sigio.h>                 /* For struct sigio */
+#include <sys/smr.h>
 
 #ifdef _KERNEL
 #include <sys/atomic.h>
@@ -156,6 +157,9 @@ struct unveil;
  *     R       rlimit_lock
  *     S       scheduler lock
  *     T       itimer_mtx
+ *
+ *  For SMR related structures that allow lock-free reads, the write lock
+ *  is indicated below.
  */
 struct process {
        /*
@@ -167,7 +171,7 @@ struct process {
        struct  ucred *ps_ucred;        /* Process owner's identity. */
 
        LIST_ENTRY(process) ps_list;    /* List of all processes. */
-       TAILQ_HEAD(,proc) ps_threads;   /* Threads in this process. */
+       SMR_TAILQ_HEAD(,proc) ps_threads; /* [K] Threads in this process. */
 
        LIST_ENTRY(process) ps_pglist;  /* List of processes in pgrp. */
        struct  process *ps_pptr;       /* Pointer to parent process. */
@@ -330,16 +334,20 @@ struct p_inentry {
 /*
  *  Locks used to protect struct members in this file:
  *     I       immutable after creation
- *     S       scheduler lock
+ *     K       kernel lock
  *     l       read only reference, see lim_read_enter()
  *     o       owned (read/modified only) by this thread
+ *     S       scheduler lock
+ *
+ *  For SMR related structures that allow lock-free reads, the write lock
+ *  is indicated below.
  */
 struct proc {
        TAILQ_ENTRY(proc) p_runq;       /* [S] current run/sleep queue */
        LIST_ENTRY(proc) p_list;        /* List of all threads. */
 
        struct  process *p_p;           /* [I] The process of this thread. */
-       TAILQ_ENTRY(proc) p_thr_link;   /* Threads in a process linkage. */
+       SMR_TAILQ_ENTRY(proc) p_thr_link; /* [K] Threads in a process linkage */
 
        TAILQ_ENTRY(proc) p_fut_link;   /* Threads in a futex linkage. */
        struct  futex   *p_futex;       /* Current sleeping futex. */
@@ -425,8 +433,9 @@ struct proc {
 #define        SONPROC 7               /* Thread is currently on a CPU. */
 
 #define        P_ZOMBIE(p)     ((p)->p_stat == SDEAD)
-#define        P_HASSIBLING(p) (TAILQ_FIRST(&(p)->p_p->ps_threads) != (p) || \
-                        TAILQ_NEXT((p), p_thr_link) != NULL)
+#define        P_HASSIBLING(p)                                                 
\
+       (SMR_TAILQ_FIRST_LOCKED(&(p)->p_p->ps_threads) != (p) ||        \
+        SMR_TAILQ_NEXT_LOCKED((p), p_thr_link) != NULL)
 
 /*
  * These flags are per-thread and kept in p_flag
diff --git sys/uvm/uvm_glue.c sys/uvm/uvm_glue.c
index 390307c4c81..40a10e4c1c5 100644
--- sys/uvm/uvm_glue.c
+++ sys/uvm/uvm_glue.c
@@ -369,7 +369,7 @@ uvm_swapout_threads(void)
                 * the smallest p_slptime
                 */
                slpp = NULL;
-               TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
+               SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) {
                        switch (p->p_stat) {
                        case SRUN:
                        case SONPROC:

Reply via email to