Diff below fixes a race in signal handling that can be triggered when
a multi-threaded process installs a signal handler with SA_RESETHAND.

I described a case of this problem on bugs@ a month ago:
  https://marc.info/?l=openbsd-bugs&m=151672599508049&w=2

The race occurs when one of the threads is doing a NOLOCK syscall, like
futex(2):

CPU0:
  Before returning to userland this thread will call CURSIG() without
  KERNEL_LOCK(), issignal() will look at `ps_sigact', see that there's
  a handler and return the signal number.  After that the thread will
  call postsig() and spin on the KERNEL_LOCK().

CPU1:
  In the meantime another thread, with the KERNEL_LOCK(), also looked at
  `ps_sigact' and is already processing the signal.  Since the handler
  has been installed with SA_RESETHAND it will reset the action to
  SIG_DFL then release the KERNEL_LOCK().

CPU0:
  After grabbing the KERNEL_LOCK() inside postsig(), the thread tries
  to process an already processed signal, see that the actions is
  SIG_DFL and call sigexit() while all other threads are SSTOP'ed.


Diff below fixes that by serializing changes to `ps_sigact' via the
KERNEL_LOCK().  I also introduce postsig_done(), gypped from FreeBSD,
to assert that the lock is held and to prepare for finer grained
locking.

Tests, oks?

Index: kern/kern_sig.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sig.c,v
retrieving revision 1.215
diff -u -p -r1.215 kern_sig.c
--- kern/kern_sig.c     19 Feb 2018 08:59:52 -0000      1.215
+++ kern/kern_sig.c     21 Feb 2018 13:09:54 -0000
@@ -82,6 +82,7 @@ void proc_stop(struct proc *p, int);
 void proc_stop_sweep(void *);
 struct timeout proc_stop_to;
 
+void postsig(struct proc *, int);
 int cansignal(struct proc *, struct process *, int);
 
 struct pool sigacts_pool;      /* memory pool for sigacts structures */
@@ -747,6 +748,27 @@ pgsignal(struct pgrp *pgrp, int signum, 
 }
 
 /*
+ * Recalculate the signal mask and reset the signal disposition after
+ * usermode frame for delivery is formed.
+ */
+void
+postsig_done(struct proc *p, int signum, struct sigacts *ps)
+{
+       int mask = sigmask(signum);
+
+       KERNEL_ASSERT_LOCKED();
+
+       p->p_ru.ru_nsignals++;
+       atomic_setbits_int(&p->p_sigmask, ps->ps_catchmask[signum]);
+       if ((ps->ps_sigreset & mask) != 0) {
+               ps->ps_sigcatch &= ~mask;
+               if (signum != SIGCONT && sigprop[signum] & SA_IGNORE)
+                       ps->ps_sigignore |= mask;
+               ps->ps_sigact[signum] = SIG_DFL;
+       }
+}
+
+/*
  * Send a signal caused by a trap to the current thread
  * If it will be caught immediately, deliver it with correct code.
  * Otherwise, post it normally.
@@ -780,16 +802,9 @@ trapsignal(struct proc *p, int signum, u
                            p->p_sigmask, code, &si);
                }
 #endif
-               p->p_ru.ru_nsignals++;
                (*pr->ps_emul->e_sendsig)(ps->ps_sigact[signum], signum,
                    p->p_sigmask, trapno, code, sigval);
-               atomic_setbits_int(&p->p_sigmask, ps->ps_catchmask[signum]);
-               if ((ps->ps_sigreset & mask) != 0) {
-                       ps->ps_sigcatch &= ~mask;
-                       if (signum != SIGCONT && sigprop[signum] & SA_IGNORE)
-                               ps->ps_sigignore |= mask;
-                       ps->ps_sigact[signum] = SIG_DFL;
-               }
+               postsig_done(p, signum, ps);
        } else {
                p->p_sisig = signum;
                p->p_sitrapno = trapno; /* XXX for core dump/debugger */
@@ -1330,9 +1345,8 @@ proc_stop_sweep(void *v)
  * from the current set of pending signals.
  */
 void
-postsig(int signum)
+postsig(struct proc *p, int signum)
 {
-       struct proc *p = curproc;
        struct process *pr = p->p_p;
        struct sigacts *ps = pr->ps_sigacts;
        sig_t action;
@@ -1341,12 +1355,8 @@ postsig(int signum)
        union sigval sigval;
        int s, code;
 
-#ifdef DIAGNOSTIC
-       if (signum == 0)
-               panic("postsig");
-#endif
-
-       KERNEL_LOCK();
+       KASSERT(signum != 0);
+       KERNEL_ASSERT_LOCKED();
 
        mask = sigmask(signum);
        atomic_clearbits_int(&p->p_siglist, mask);
@@ -1366,7 +1376,7 @@ postsig(int signum)
 #ifdef KTRACE
        if (KTRPOINT(p, KTR_PSIG)) {
                siginfo_t si;
-               
+
                initsiginfo(&si, signum, trapno, code, sigval);
                ktrpsig(p, signum, action, p->p_flag & P_SIGSUSPEND ?
                    p->p_oldmask : p->p_sigmask, code, &si);
@@ -1407,15 +1417,6 @@ postsig(int signum)
                } else {
                        returnmask = p->p_sigmask;
                }
-               atomic_setbits_int(&p->p_sigmask, ps->ps_catchmask[signum]);
-               if ((ps->ps_sigreset & mask) != 0) {
-                       ps->ps_sigcatch &= ~mask;
-                       if (signum != SIGCONT && sigprop[signum] & SA_IGNORE)
-                               ps->ps_sigignore |= mask;
-                       ps->ps_sigact[signum] = SIG_DFL;
-               }
-               splx(s);
-               p->p_ru.ru_nsignals++;
                if (p->p_sisig == signum) {
                        p->p_sisig = 0;
                        p->p_sitrapno = 0;
@@ -1425,9 +1426,9 @@ postsig(int signum)
 
                (*pr->ps_emul->e_sendsig)(action, signum, returnmask, trapno,
                    code, sigval);
+               postsig_done(p, signum, ps);
+               splx(s);
        }
-
-       KERNEL_UNLOCK();
 }
 
 /*
@@ -1816,7 +1817,7 @@ filt_signal(struct knote *kn, long hint)
 void
 userret(struct proc *p)
 {
-       int sig;
+       int signum;
 
        /* send SIGPROF or SIGVTALRM if their timers interrupted this thread */
        if (p->p_flag & P_PROFPEND) {
@@ -1832,8 +1833,12 @@ userret(struct proc *p)
                KERNEL_UNLOCK();
        }
 
-       while ((sig = CURSIG(p)) != 0)
-               postsig(sig);
+       if (CURSIG(p) != 0) {
+               KERNEL_LOCK();
+               while ((signum = CURSIG(p)) != 0)
+                       postsig(p, signum);
+               KERNEL_UNLOCK();
+       }
 
        /*
         * If P_SIGSUSPEND is still set here, then we still need to restore
@@ -1845,8 +1850,10 @@ userret(struct proc *p)
                atomic_clearbits_int(&p->p_flag, P_SIGSUSPEND);
                p->p_sigmask = p->p_oldmask;
 
-               while ((sig = CURSIG(p)) != 0)
-                       postsig(sig);
+               KERNEL_LOCK();
+               while ((signum = CURSIG(p)) != 0)
+                       postsig(p, signum);
+               KERNEL_UNLOCK();
        }
 
        if (p->p_flag & P_SUSPSINGLE) {
Index: sys/signalvar.h
===================================================================
RCS file: /cvs/src/sys/sys/signalvar.h,v
retrieving revision 1.28
diff -u -p -r1.28 signalvar.h
--- sys/signalvar.h     5 May 2015 02:13:46 -0000       1.28
+++ sys/signalvar.h     21 Feb 2018 13:04:15 -0000
@@ -151,7 +151,6 @@ void        gsignal(int pgid, int sig);
 void   csignal(pid_t pgid, int signum, uid_t uid, uid_t euid);
 int    issignal(struct proc *p);
 void   pgsignal(struct pgrp *pgrp, int sig, int checkctty);
-void   postsig(int sig);
 void   psignal(struct proc *p, int sig);
 void   ptsignal(struct proc *p, int sig, enum signal_type type);
 #define prsignal(pr,sig)       ptsignal((pr)->ps_mainproc, (sig), SPROCESS)

Reply via email to