On Thu, Apr 27, 2023 at 02:54:38PM +0200, Claudio Jeker wrote:
> On Thu, Apr 27, 2023 at 01:55:33PM +0300, Vitaliy Makkoveev wrote:
> > Now only direct netlock used for inet sockets protection. The unlocked
> > access to all other sockets is safe, but we could lost consistency for a
> > little. Since the solock() used for sockets protection, make locking
> > path common and use it. Make it shared because this is read-only access
> > to sockets data. For same reason use shared netlock while performing
> > inpcb tables foreach walkthrough.
> 
> This is still wrong. fill_file() is not allowed to sleep. So you can not
> use a rwlock in here. fill_file is called inside a allprocess
> LIST_FOREACH loop and sleeping there will blow up.
>  
> Maybe it is enough to grab a shared NETLOCK around the LIST_FOREACH() to
> ensure that we don't sleep inside.
> 
> Please fix this properly. Right now running fstat is like playing russian
> roulette.
> 

This time fill_file() already has sleep point introduced by direct
netlock invocation. So, this diff doesn't make it worse.

The netlock around allprocess foreach loops is not the properly fix,
because we left non inet sockets unlocked.

However, these allprocess foreach loops are not the only place with
similar problem. Some times ago I did the diff to introduce the
`allprocess_lock' rwlock for `allprocess' list protection.
Unfortunately, this diff was abandoned after some discussions about
proc_stop_sweep() software interrupt handler. I reworked it to kernel
thread to use this new rwlock within. Since proc_stop_sweep() only
schedules parents of stopped processes to run I assume this extra delay
as non significant.

This is the last iteration of `allprocess' diff. I propose to commit it
first to fix sysctl_file() in the right way.


Index: sys/arch/sh/sh/pmap.c
===================================================================
RCS file: /cvs/src/sys/arch/sh/sh/pmap.c,v
retrieving revision 1.30
diff -u -p -r1.30 pmap.c
--- sys/arch/sh/sh/pmap.c       1 Jan 2023 19:49:17 -0000       1.30
+++ sys/arch/sh/sh/pmap.c       9 Jan 2023 09:06:42 -0000
@@ -1071,6 +1071,11 @@ __pmap_asid_alloc(void)
         * so it's far from LRU but rather almost pessimal once you have
         * too many processes.
         */
+       /*
+        * XXX Unlocked access to `allprocess' list is safe. This is
+        * uniprocessor machine, we don't modify the list and we have
+        * no context switch within.
+        */
        LIST_FOREACH(pr, &allprocess, ps_list) {
                pmap_t pmap = pr->ps_vmspace->vm_map.pmap;
 
Index: sys/kern/kern_exit.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_exit.c,v
retrieving revision 1.210
diff -u -p -r1.210 kern_exit.c
--- sys/kern/kern_exit.c        29 Dec 2022 01:36:36 -0000      1.210
+++ sys/kern/kern_exit.c        9 Jan 2023 09:06:42 -0000
@@ -222,6 +222,8 @@ exit1(struct proc *p, int xexit, int xsi
 
        p->p_fd = NULL;         /* zap the thread's copy */
 
+       rw_enter_write(&allprocess_lock);
+
         /*
         * Remove proc from pidhash chain and allproc so looking
         * it up won't work.  We will put the proc on the
@@ -294,6 +296,8 @@ exit1(struct proc *p, int xexit, int xsi
                        process_clear_orphan(qr);
                }
        }
+
+       rw_exit_write(&allprocess_lock);
 
        /* add thread's accumulated rusage into the process's total */
        ruadd(rup, &p->p_ru);
Index: sys/kern/kern_fork.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.245
diff -u -p -r1.245 kern_fork.c
--- sys/kern/kern_fork.c        7 Jan 2023 05:24:58 -0000       1.245
+++ sys/kern/kern_fork.c        9 Jan 2023 09:06:42 -0000
@@ -62,6 +62,11 @@
 #include <uvm/uvm.h>
 #include <machine/tcb.h>
 
+/*
+ * Locks used to protect struct members in this file:
+ *     P       allprocess_lock
+ */
+
 int    nprocesses = 1;         /* process 0 */
 int    nthreads = 1;           /* proc 0 */
 struct forkstat forkstat;
@@ -372,6 +377,8 @@ fork1(struct proc *curp, int flags, void
                return (ENOMEM);
        }
 
+       rw_enter_write(&allprocess_lock);
+
        /*
         * From now on, we're committed to the fork and cannot fail.
         */
@@ -468,19 +475,28 @@ fork1(struct proc *curp, int flags, void
        KNOTE(&curpr->ps_klist, NOTE_FORK | pr->ps_pid);
 
        /*
-        * Update stats now that we know the fork was successful.
+        * Return child pid to parent process
         */
-       uvmexp.forks++;
-       if (flags & FORK_PPWAIT)
-               uvmexp.forks_ppwait++;
-       if (flags & FORK_SHAREVM)
-               uvmexp.forks_sharevm++;
+       if (retval != NULL)
+               *retval = pr->ps_pid;
 
        /*
         * Pass a pointer to the new process to the caller.
+        * XXX but we don't return `rnewprocp' to sys_fork()
+        * or sys_vfork().
         */
        if (rnewprocp != NULL)
                *rnewprocp = p;
+       rw_exit_write(&allprocess_lock);
+
+       /*
+        * Update stats now that we know the fork was successful.
+        */
+       uvmexp.forks++;
+       if (flags & FORK_PPWAIT)
+               uvmexp.forks_ppwait++;
+       if (flags & FORK_SHAREVM)
+               uvmexp.forks_sharevm++;
 
        /*
         * Preserve synchronization semantics of vfork.  If waiting for
@@ -499,11 +515,6 @@ fork1(struct proc *curp, int flags, void
        if ((flags & FORK_PTRACE) && (curpr->ps_flags & PS_TRACED))
                psignal(curp, SIGTRAP);
 
-       /*
-        * Return child pid to parent process
-        */
-       if (retval != NULL)
-               *retval = pr->ps_pid;
        return (0);
 }
 
@@ -609,7 +620,7 @@ alloctid(void)
 /*
  * Checks for current use of a pid, either as a pid or pgid.
  */
-pid_t oldpids[128];
+pid_t oldpids[128];    /* [P] */
 int
 ispidtaken(pid_t pid)
 {
Index: sys/kern/kern_ktrace.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_ktrace.c,v
retrieving revision 1.109
diff -u -p -r1.109 kern_ktrace.c
--- sys/kern/kern_ktrace.c      5 Dec 2022 23:18:37 -0000       1.109
+++ sys/kern/kern_ktrace.c      9 Jan 2023 09:06:42 -0000
@@ -432,6 +432,7 @@ doktrace(struct vnode *vp, int ops, int 
         * Clear all uses of the tracefile
         */
        if (ops == KTROP_CLEARFILE) {
+               rw_enter_read(&allprocess_lock);
                LIST_FOREACH(pr, &allprocess, ps_list) {
                        if (pr->ps_tracevp == vp) {
                                if (ktrcanset(p, pr))
@@ -440,6 +441,7 @@ doktrace(struct vnode *vp, int ops, int 
                                        error = EPERM;
                        }
                }
+               rw_exit_read(&allprocess_lock);
                goto done;
        }
        /*
@@ -670,12 +672,14 @@ bad:
         */
        log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
            error);
+       rw_enter_read(&allprocess_lock);
        LIST_FOREACH(pr, &allprocess, ps_list) {
                if (pr == curp->p_p)
                        continue;
                if (pr->ps_tracevp == vp && pr->ps_tracecred == cred)
                        ktrcleartrace(pr);
        }
+       rw_exit_read(&allprocess_lock);
        ktrcleartrace(curp->p_p);
        return (error);
 }
Index: sys/kern/kern_proc.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_proc.c,v
retrieving revision 1.94
diff -u -p -r1.94 kern_proc.c
--- sys/kern/kern_proc.c        2 Jan 2023 23:09:48 -0000       1.94
+++ sys/kern/kern_proc.c        9 Jan 2023 09:06:42 -0000
@@ -46,6 +46,7 @@
 /*
  *  Locks used to protect struct members in this file:
  *     I       immutable after creation
+ *     P       allprocess_lock
  *     U       uidinfolk
  */
 
@@ -54,6 +55,8 @@ struct rwlock uidinfolk;
 LIST_HEAD(uihashhead, uidinfo) *uihashtbl;     /* [U] */
 u_long uihash;                         /* [I] size of hash table - 1 */
 
+struct rwlock allprocess_lock;
+
 /*
  * Other process lists
  */
@@ -63,7 +66,7 @@ struct pidhashhead *pidhashtbl;
 u_long pidhash;
 struct pgrphashhead *pgrphashtbl;
 u_long pgrphash;
-struct processlist allprocess;
+struct processlist allprocess;         /* [P] */
 struct processlist zombprocess;
 struct proclist allproc;
 
@@ -92,6 +95,7 @@ procinit(void)
        LIST_INIT(&zombprocess);
        LIST_INIT(&allproc);
 
+       rw_init(&allprocess_lock, "allpslk");
        rw_init(&uidinfolk, "uidinfo");
 
        tidhashtbl = hashinit(maxthread / 4, M_PROC, M_NOWAIT, &tidhash);
Index: sys/kern/kern_resource.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_resource.c,v
retrieving revision 1.76
diff -u -p -r1.76 kern_resource.c
--- sys/kern/kern_resource.c    17 Nov 2022 18:53:13 -0000      1.76
+++ sys/kern/kern_resource.c    9 Jan 2023 09:06:42 -0000
@@ -122,10 +122,12 @@ sys_getpriority(struct proc *curp, void 
        case PRIO_USER:
                if (SCARG(uap, who) == 0)
                        SCARG(uap, who) = curp->p_ucred->cr_uid;
+               rw_enter_read(&allprocess_lock);
                LIST_FOREACH(pr, &allprocess, ps_list)
                        if (pr->ps_ucred->cr_uid == SCARG(uap, who) &&
                            pr->ps_nice < low)
                                low = pr->ps_nice;
+               rw_exit_read(&allprocess_lock);
                break;
 
        default:
@@ -178,11 +180,13 @@ sys_setpriority(struct proc *curp, void 
        case PRIO_USER:
                if (SCARG(uap, who) == 0)
                        SCARG(uap, who) = curp->p_ucred->cr_uid;
+               rw_enter_read(&allprocess_lock);
                LIST_FOREACH(pr, &allprocess, ps_list)
                        if (pr->ps_ucred->cr_uid == SCARG(uap, who)) {
                                error = donice(curp, pr, SCARG(uap, prio));
                                found = 1;
                        }
+               rw_exit_read(&allprocess_lock);
                break;
 
        default:
Index: sys/kern/kern_sig.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sig.c,v
retrieving revision 1.303
diff -u -p -r1.303 kern_sig.c
--- sys/kern/kern_sig.c 2 Jan 2023 23:09:48 -0000       1.303
+++ sys/kern/kern_sig.c 9 Jan 2023 09:06:42 -0000
@@ -61,6 +61,7 @@
 #include <sys/pledge.h>
 #include <sys/witness.h>
 #include <sys/exec_elf.h>
+#include <sys/kthread.h>
 
 #include <sys/mount.h>
 #include <sys/syscallargs.h>
@@ -128,7 +129,9 @@ void setsigvec(struct proc *, int, struc
 
 void proc_stop(struct proc *p, int);
 void proc_stop_sweep(void *);
-void *proc_stop_si;
+void proc_stop_sweep_create(void *);
+
+int proc_stop_pending = 0;
 
 void setsigctx(struct proc *, int, struct sigctx *);
 void postsig_done(struct proc *, int, sigset_t, int);
@@ -203,11 +206,7 @@ cansignal(struct proc *p, struct process
 void
 signal_init(void)
 {
-       proc_stop_si = softintr_establish(IPL_SOFTCLOCK, proc_stop_sweep,
-           NULL);
-       if (proc_stop_si == NULL)
-               panic("signal_init failed to register softintr");
-
+       kthread_create_deferred(proc_stop_sweep_create, NULL);
        pool_init(&sigacts_pool, sizeof(struct sigacts), 0, IPL_NONE,
            PR_WAITOK, "sigapl", NULL);
 }
@@ -670,6 +669,7 @@ killpg1(struct proc *cp, int signum, int
                /* 
                 * broadcast
                 */
+               rw_enter_read(&allprocess_lock);
                LIST_FOREACH(pr, &allprocess, ps_list) {
                        if (pr->ps_pid <= 1 ||
                            pr->ps_flags & (PS_SYSTEM | PS_NOBROADCASTKILL) ||
@@ -679,6 +679,7 @@ killpg1(struct proc *cp, int signum, int
                        if (signum)
                                prsignal(pr, signum);
                }
+               rw_exit_read(&allprocess_lock);
        } else {
                if (pgid == 0)
                        /*
@@ -1428,18 +1429,16 @@ proc_stop(struct proc *p, int sw)
        atomic_clearbits_int(&pr->ps_flags, PS_WAITED);
        atomic_setbits_int(&pr->ps_flags, PS_STOPPED);
        atomic_setbits_int(&p->p_flag, P_SUSPSIG);
-       /*
-        * We need this soft interrupt to be handled fast.
-        * Extra calls to softclock don't hurt.
-        */
-       softintr_schedule(proc_stop_si);
+
+       proc_stop_pending = 1;
+       wakeup(&proc_stop_pending);
+
        if (sw)
                mi_switch();
 }
 
 /*
- * Called from a soft interrupt to send signals to the parents of stopped
- * processes.
+ * Dedicated thread to send signals to the parents of stopped processes.
  * We can't do this in proc_stop because it's called with nasty locks held
  * and we would need recursive scheduler lock to deal with that.
  */
@@ -1448,15 +1447,36 @@ proc_stop_sweep(void *v)
 {
        struct process *pr;
 
-       LIST_FOREACH(pr, &allprocess, ps_list) {
-               if ((pr->ps_flags & PS_STOPPED) == 0)
-                       continue;
-               atomic_clearbits_int(&pr->ps_flags, PS_STOPPED);
+       while(1) {
+               if (proc_stop_pending == 0)
+                       tsleep_nsec(&proc_stop_pending, PRIO_MAX | PWAIT,
+                           "idle", INFSLP);
+               proc_stop_pending = 0;
 
-               if ((pr->ps_pptr->ps_sigacts->ps_sigflags & SAS_NOCLDSTOP) == 0)
-                       prsignal(pr->ps_pptr, SIGCHLD);
-               wakeup(pr->ps_pptr);
+               rw_enter_read(&allprocess_lock);
+               LIST_FOREACH(pr, &allprocess, ps_list) {
+                       if ((pr->ps_flags & PS_STOPPED) == 0)
+                               continue;
+                       atomic_clearbits_int(&pr->ps_flags, PS_STOPPED);
+
+                       if ((pr->ps_pptr->ps_sigacts->ps_sigflags &
+                           SAS_NOCLDSTOP) == 0)
+                               prsignal(pr->ps_pptr, SIGCHLD);
+                       wakeup(pr->ps_pptr);
+               }
+               rw_exit_read(&allprocess_lock);
        }
+}
+
+/*
+ * Deferred proc_stop_sweep() task creator.
+ */
+void
+proc_stop_sweep_create(void *v)
+{
+       if (kthread_create(proc_stop_sweep, NULL, NULL,
+           "proc_stop_sweep"))
+               panic("signal_init failed to create kthread");
 }
 
 /*
Index: sys/kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.408
diff -u -p -r1.408 kern_sysctl.c
--- sys/kern/kern_sysctl.c      7 Nov 2022 14:25:44 -0000       1.408
+++ sys/kern/kern_sysctl.c      9 Jan 2023 09:06:42 -0000
@@ -1418,6 +1418,7 @@ sysctl_file(int *name, u_int namelen, ch
                        break;
                }
                matched = 0;
+               rw_enter_read(&allprocess_lock);
                LIST_FOREACH(pr, &allprocess, ps_list) {
                        /*
                         * skip system, exiting, embryonic and undead
@@ -1446,10 +1447,12 @@ sysctl_file(int *name, u_int namelen, ch
                                FRELE(fp, p);
                        }
                }
+               rw_exit_read(&allprocess_lock);
                if (!matched)
                        error = ESRCH;
                break;
        case KERN_FILE_BYUID:
+               rw_enter_read(&allprocess_lock);
                LIST_FOREACH(pr, &allprocess, ps_list) {
                        /*
                         * skip system, exiting, embryonic and undead
@@ -1475,6 +1478,7 @@ sysctl_file(int *name, u_int namelen, ch
                                FRELE(fp, p);
                        }
                }
+               rw_exit_read(&allprocess_lock);
                break;
        default:
                error = EINVAL;
@@ -1530,8 +1534,9 @@ sysctl_doproc(int *name, u_int namelen, 
        if (where != NULL)
                kproc = malloc(sizeof(*kproc), M_TEMP, M_WAITOK);
 
-       pr = LIST_FIRST(&allprocess);
        doingzomb = 0;
+       rw_enter_read(&allprocess_lock);
+       pr = LIST_FIRST(&allprocess);
 again:
        for (; pr != NULL; pr = LIST_NEXT(pr, ps_list)) {
                /* XXX skip processes in the middle of being zapped */
@@ -1594,6 +1599,7 @@ again:
                        break;
 
                default:
+                       rw_exit_read(&allprocess_lock);
                        error = EINVAL;
                        goto err;
                }
@@ -1601,8 +1607,10 @@ again:
                if (buflen >= elem_size && elem_count > 0) {
                        fill_kproc(pr, kproc, NULL, show_pointers);
                        error = copyout(kproc, dp, elem_size);
-                       if (error)
+                       if (error) {
+                               rw_exit_read(&allprocess_lock);
                                goto err;
+                       }
                        dp += elem_size;
                        buflen -= elem_size;
                        elem_count--;
@@ -1617,8 +1625,10 @@ again:
                        if (buflen >= elem_size && elem_count > 0) {
                                fill_kproc(pr, kproc, p, show_pointers);
                                error = copyout(kproc, dp, elem_size);
-                               if (error)
+                               if (error) {
+                                       rw_exit_read(&allprocess_lock);
                                        goto err;
+                               }
                                dp += elem_size;
                                buflen -= elem_size;
                                elem_count--;
@@ -1631,6 +1641,8 @@ again:
                doingzomb++;
                goto again;
        }
+       rw_exit_read(&allprocess_lock);
+
        if (where != NULL) {
                *sizep = dp - where;
                if (needed > *sizep) {
Index: sys/kern/kern_unveil.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_unveil.c,v
retrieving revision 1.55
diff -u -p -r1.55 kern_unveil.c
--- sys/kern/kern_unveil.c      5 Dec 2022 23:18:37 -0000       1.55
+++ sys/kern/kern_unveil.c      9 Jan 2023 09:06:42 -0000
@@ -823,6 +823,7 @@ unveil_removevnode(struct vnode *vp)
 
        vref(vp); /* make sure it is held till we are done */
 
+       rw_assert_rdlock(&allprocess_lock);
        LIST_FOREACH(pr, &allprocess, ps_list) {
                struct unveil * uv;
 
Index: sys/kern/vfs_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.360
diff -u -p -r1.360 vfs_syscalls.c
--- sys/kern/vfs_syscalls.c     14 Aug 2022 01:58:28 -0000      1.360
+++ sys/kern/vfs_syscalls.c     9 Jan 2023 09:06:42 -0000
@@ -325,6 +325,7 @@ checkdirs(struct vnode *olddp)
                return;
        if (VFS_ROOT(olddp->v_mountedhere, &newdp))
                panic("mount: lost mount");
+       rw_enter_read(&allprocess_lock);
        LIST_FOREACH(pr, &allprocess, ps_list) {
                fdp = pr->ps_fd;
                if (fdp->fd_cdir == olddp) {
@@ -338,6 +339,7 @@ checkdirs(struct vnode *olddp)
                        fdp->fd_rdir = newdp;
                }
        }
+       rw_exit_read(&allprocess_lock);
        if (rootvnode == olddp) {
                free_count++;
                vref(newdp);
@@ -483,12 +485,20 @@ dounmount_leaf(struct mount *mp, int fla
        }
 
        /*
+        * Protect `allprocess' list access within
+        * unveil_removevnode(). Do this here to avoid
+        * context switch within `mnt_vnodelist' foreach
+        * loop.
+        */
+       rw_enter_read(&allprocess_lock);
+       /*
         * Before calling file system unmount, make sure
         * all unveils to vnodes in here are dropped.
         */
        TAILQ_FOREACH_SAFE(vp , &mp->mnt_vnodelist, v_mntvnodes, nvp) {
                unveil_removevnode(vp);
        }
+       rw_exit_read(&allprocess_lock);
 
        if (((mp->mnt_flag & MNT_RDONLY) ||
            (error = VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p)) == 0) ||
Index: sys/sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.338
diff -u -p -r1.338 proc.h
--- sys/sys/proc.h      7 Jan 2023 05:24:58 -0000       1.338
+++ sys/sys/proc.h      9 Jan 2023 09:06:42 -0000
@@ -124,6 +124,7 @@ struct unveil;
  *     K       kernel lock
  *     m       this process' `ps_mtx'
  *     p       this process' `ps_lock'
+ *     P       allprocess_lock
  *     R       rlimit_lock
  *     S       scheduler lock
  *     T       itimer_mtx
@@ -137,7 +138,7 @@ struct process {
        struct  proc *ps_mainproc;
        struct  ucred *ps_ucred;        /* Process owner's identity. */
 
-       LIST_ENTRY(process) ps_list;    /* List of all processes. */
+       LIST_ENTRY(process) ps_list;    /* [P] List of all processes. */
        TAILQ_HEAD(,proc) ps_threads;   /* [K|S] Threads in this process. */
 
        LIST_ENTRY(process) ps_pglist;  /* List of processes in pgrp. */
@@ -501,6 +502,8 @@ extern struct proc proc0;           /* Process sl
 extern struct process process0;                /* Process slot for kernel 
threads. */
 extern int nprocesses, maxprocess;     /* Cur and max number of processes. */
 extern int nthreads, maxthread;                /* Cur and max number of 
threads. */
+
+extern struct rwlock allprocess_lock;
 
 LIST_HEAD(proclist, proc);
 LIST_HEAD(processlist, process);

Reply via email to