Diff below fixes a race in signal handling that can be triggered when a multi-threaded process installs a signal handler with SA_RESETHAND.
I described a case of this problem on bugs@ a month ago: https://marc.info/?l=openbsd-bugs&m=151672599508049&w=2 The race occurs when one of the threads is doing a NOLOCK syscall, like futex(2): CPU0: Before returning to userland this thread will call CURSIG() without KERNEL_LOCK(), issignal() will look at `ps_sigact', see that there's a handler and return the signal number. After that the thread will call postsig() and spin on the KERNEL_LOCK(). CPU1: In the meantime another thread, with the KERNEL_LOCK(), also looked at `ps_sigact' and is already processing the signal. Since the handler has been installed with SA_RESETHAND it will reset the action to SIG_DFL then release the KERNEL_LOCK(). CPU0: After grabbing the KERNEL_LOCK() inside postsig(), the thread tries to process an already processed signal, see that the actions is SIG_DFL and call sigexit() while all other threads are SSTOP'ed. Diff below fixes that by serializing changes to `ps_sigact' via the KERNEL_LOCK(). I also introduce postsig_done(), gypped from FreeBSD, to assert that the lock is held and to prepare for finer grained locking. Tests, oks? Index: kern/kern_sig.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sig.c,v retrieving revision 1.215 diff -u -p -r1.215 kern_sig.c --- kern/kern_sig.c 19 Feb 2018 08:59:52 -0000 1.215 +++ kern/kern_sig.c 21 Feb 2018 13:09:54 -0000 @@ -82,6 +82,7 @@ void proc_stop(struct proc *p, int); void proc_stop_sweep(void *); struct timeout proc_stop_to; +void postsig(struct proc *, int); int cansignal(struct proc *, struct process *, int); struct pool sigacts_pool; /* memory pool for sigacts structures */ @@ -747,6 +748,27 @@ pgsignal(struct pgrp *pgrp, int signum, } /* + * Recalculate the signal mask and reset the signal disposition after + * usermode frame for delivery is formed. + */ +void +postsig_done(struct proc *p, int signum, struct sigacts *ps) +{ + int mask = sigmask(signum); + + KERNEL_ASSERT_LOCKED(); + + p->p_ru.ru_nsignals++; + atomic_setbits_int(&p->p_sigmask, ps->ps_catchmask[signum]); + if ((ps->ps_sigreset & mask) != 0) { + ps->ps_sigcatch &= ~mask; + if (signum != SIGCONT && sigprop[signum] & SA_IGNORE) + ps->ps_sigignore |= mask; + ps->ps_sigact[signum] = SIG_DFL; + } +} + +/* * Send a signal caused by a trap to the current thread * If it will be caught immediately, deliver it with correct code. * Otherwise, post it normally. @@ -780,16 +802,9 @@ trapsignal(struct proc *p, int signum, u p->p_sigmask, code, &si); } #endif - p->p_ru.ru_nsignals++; (*pr->ps_emul->e_sendsig)(ps->ps_sigact[signum], signum, p->p_sigmask, trapno, code, sigval); - atomic_setbits_int(&p->p_sigmask, ps->ps_catchmask[signum]); - if ((ps->ps_sigreset & mask) != 0) { - ps->ps_sigcatch &= ~mask; - if (signum != SIGCONT && sigprop[signum] & SA_IGNORE) - ps->ps_sigignore |= mask; - ps->ps_sigact[signum] = SIG_DFL; - } + postsig_done(p, signum, ps); } else { p->p_sisig = signum; p->p_sitrapno = trapno; /* XXX for core dump/debugger */ @@ -1330,9 +1345,8 @@ proc_stop_sweep(void *v) * from the current set of pending signals. */ void -postsig(int signum) +postsig(struct proc *p, int signum) { - struct proc *p = curproc; struct process *pr = p->p_p; struct sigacts *ps = pr->ps_sigacts; sig_t action; @@ -1341,12 +1355,8 @@ postsig(int signum) union sigval sigval; int s, code; -#ifdef DIAGNOSTIC - if (signum == 0) - panic("postsig"); -#endif - - KERNEL_LOCK(); + KASSERT(signum != 0); + KERNEL_ASSERT_LOCKED(); mask = sigmask(signum); atomic_clearbits_int(&p->p_siglist, mask); @@ -1366,7 +1376,7 @@ postsig(int signum) #ifdef KTRACE if (KTRPOINT(p, KTR_PSIG)) { siginfo_t si; - + initsiginfo(&si, signum, trapno, code, sigval); ktrpsig(p, signum, action, p->p_flag & P_SIGSUSPEND ? p->p_oldmask : p->p_sigmask, code, &si); @@ -1407,15 +1417,6 @@ postsig(int signum) } else { returnmask = p->p_sigmask; } - atomic_setbits_int(&p->p_sigmask, ps->ps_catchmask[signum]); - if ((ps->ps_sigreset & mask) != 0) { - ps->ps_sigcatch &= ~mask; - if (signum != SIGCONT && sigprop[signum] & SA_IGNORE) - ps->ps_sigignore |= mask; - ps->ps_sigact[signum] = SIG_DFL; - } - splx(s); - p->p_ru.ru_nsignals++; if (p->p_sisig == signum) { p->p_sisig = 0; p->p_sitrapno = 0; @@ -1425,9 +1426,9 @@ postsig(int signum) (*pr->ps_emul->e_sendsig)(action, signum, returnmask, trapno, code, sigval); + postsig_done(p, signum, ps); + splx(s); } - - KERNEL_UNLOCK(); } /* @@ -1816,7 +1817,7 @@ filt_signal(struct knote *kn, long hint) void userret(struct proc *p) { - int sig; + int signum; /* send SIGPROF or SIGVTALRM if their timers interrupted this thread */ if (p->p_flag & P_PROFPEND) { @@ -1832,8 +1833,12 @@ userret(struct proc *p) KERNEL_UNLOCK(); } - while ((sig = CURSIG(p)) != 0) - postsig(sig); + if (CURSIG(p) != 0) { + KERNEL_LOCK(); + while ((signum = CURSIG(p)) != 0) + postsig(p, signum); + KERNEL_UNLOCK(); + } /* * If P_SIGSUSPEND is still set here, then we still need to restore @@ -1845,8 +1850,10 @@ userret(struct proc *p) atomic_clearbits_int(&p->p_flag, P_SIGSUSPEND); p->p_sigmask = p->p_oldmask; - while ((sig = CURSIG(p)) != 0) - postsig(sig); + KERNEL_LOCK(); + while ((signum = CURSIG(p)) != 0) + postsig(p, signum); + KERNEL_UNLOCK(); } if (p->p_flag & P_SUSPSINGLE) { Index: sys/signalvar.h =================================================================== RCS file: /cvs/src/sys/sys/signalvar.h,v retrieving revision 1.28 diff -u -p -r1.28 signalvar.h --- sys/signalvar.h 5 May 2015 02:13:46 -0000 1.28 +++ sys/signalvar.h 21 Feb 2018 13:04:15 -0000 @@ -151,7 +151,6 @@ void gsignal(int pgid, int sig); void csignal(pid_t pgid, int signum, uid_t uid, uid_t euid); int issignal(struct proc *p); void pgsignal(struct pgrp *pgrp, int sig, int checkctty); -void postsig(int sig); void psignal(struct proc *p, int sig); void ptsignal(struct proc *p, int sig, enum signal_type type); #define prsignal(pr,sig) ptsignal((pr)->ps_mainproc, (sig), SPROCESS)
