On Wed, Feb 21, 2018 at 02:15:07PM +0100, Martin Pieuchot wrote: > Diff below fixes a race in signal handling that can be triggered when > a multi-threaded process installs a signal handler with SA_RESETHAND. > > I described a case of this problem on bugs@ a month ago: > https://marc.info/?l=openbsd-bugs&m=151672599508049&w=2 > > The race occurs when one of the threads is doing a NOLOCK syscall, like > futex(2): > > CPU0: > Before returning to userland this thread will call CURSIG() without > KERNEL_LOCK(), issignal() will look at `ps_sigact', see that there's > a handler and return the signal number. After that the thread will > call postsig() and spin on the KERNEL_LOCK(). > > CPU1: > In the meantime another thread, with the KERNEL_LOCK(), also looked at > `ps_sigact' and is already processing the signal. Since the handler > has been installed with SA_RESETHAND it will reset the action to > SIG_DFL then release the KERNEL_LOCK(). > > CPU0: > After grabbing the KERNEL_LOCK() inside postsig(), the thread tries > to process an already processed signal, see that the actions is > SIG_DFL and call sigexit() while all other threads are SSTOP'ed. > > > Diff below fixes that by serializing changes to `ps_sigact' via the > KERNEL_LOCK(). I also introduce postsig_done(), gypped from FreeBSD, > to assert that the lock is held and to prepare for finer grained > locking. > > Tests, oks? > > Index: kern/kern_sig.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_sig.c,v > retrieving revision 1.215 > diff -u -p -r1.215 kern_sig.c > --- kern/kern_sig.c 19 Feb 2018 08:59:52 -0000 1.215 > +++ kern/kern_sig.c 21 Feb 2018 13:09:54 -0000 > @@ -82,6 +82,7 @@ void proc_stop(struct proc *p, int); > void proc_stop_sweep(void *); > struct timeout proc_stop_to; > > +void postsig(struct proc *, int); > int cansignal(struct proc *, struct process *, int); > > struct pool sigacts_pool; /* memory pool for sigacts structures */ > @@ -747,6 +748,27 @@ pgsignal(struct pgrp *pgrp, int signum, > } > > /* > + * Recalculate the signal mask and reset the signal disposition after > + * usermode frame for delivery is formed. > + */ > +void > +postsig_done(struct proc *p, int signum, struct sigacts *ps) > +{ > + int mask = sigmask(signum); > + > + KERNEL_ASSERT_LOCKED(); > + > + p->p_ru.ru_nsignals++; > + atomic_setbits_int(&p->p_sigmask, ps->ps_catchmask[signum]); > + if ((ps->ps_sigreset & mask) != 0) { > + ps->ps_sigcatch &= ~mask; > + if (signum != SIGCONT && sigprop[signum] & SA_IGNORE) > + ps->ps_sigignore |= mask; > + ps->ps_sigact[signum] = SIG_DFL; > + } > +} > + > +/* > * Send a signal caused by a trap to the current thread > * If it will be caught immediately, deliver it with correct code. > * Otherwise, post it normally. > @@ -780,16 +802,9 @@ trapsignal(struct proc *p, int signum, u > p->p_sigmask, code, &si); > } > #endif > - p->p_ru.ru_nsignals++; > (*pr->ps_emul->e_sendsig)(ps->ps_sigact[signum], signum, > p->p_sigmask, trapno, code, sigval); > - atomic_setbits_int(&p->p_sigmask, ps->ps_catchmask[signum]); > - if ((ps->ps_sigreset & mask) != 0) { > - ps->ps_sigcatch &= ~mask; > - if (signum != SIGCONT && sigprop[signum] & SA_IGNORE) > - ps->ps_sigignore |= mask; > - ps->ps_sigact[signum] = SIG_DFL; > - } > + postsig_done(p, signum, ps); > } else { > p->p_sisig = signum; > p->p_sitrapno = trapno; /* XXX for core dump/debugger */ > @@ -1330,9 +1345,8 @@ proc_stop_sweep(void *v) > * from the current set of pending signals. > */ > void > -postsig(int signum) > +postsig(struct proc *p, int signum) > { > - struct proc *p = curproc; > struct process *pr = p->p_p; > struct sigacts *ps = pr->ps_sigacts; > sig_t action; > @@ -1341,12 +1355,8 @@ postsig(int signum) > union sigval sigval; > int s, code; > > -#ifdef DIAGNOSTIC > - if (signum == 0) > - panic("postsig"); > -#endif > - > - KERNEL_LOCK(); > + KASSERT(signum != 0); > + KERNEL_ASSERT_LOCKED(); > > mask = sigmask(signum); > atomic_clearbits_int(&p->p_siglist, mask); > @@ -1366,7 +1376,7 @@ postsig(int signum) > #ifdef KTRACE > if (KTRPOINT(p, KTR_PSIG)) { > siginfo_t si; > - > + > initsiginfo(&si, signum, trapno, code, sigval); > ktrpsig(p, signum, action, p->p_flag & P_SIGSUSPEND ? > p->p_oldmask : p->p_sigmask, code, &si); > @@ -1407,15 +1417,6 @@ postsig(int signum) > } else { > returnmask = p->p_sigmask; > } > - atomic_setbits_int(&p->p_sigmask, ps->ps_catchmask[signum]); > - if ((ps->ps_sigreset & mask) != 0) { > - ps->ps_sigcatch &= ~mask; > - if (signum != SIGCONT && sigprop[signum] & SA_IGNORE) > - ps->ps_sigignore |= mask; > - ps->ps_sigact[signum] = SIG_DFL; > - } > - splx(s); > - p->p_ru.ru_nsignals++; > if (p->p_sisig == signum) { > p->p_sisig = 0; > p->p_sitrapno = 0; > @@ -1425,9 +1426,9 @@ postsig(int signum) > > (*pr->ps_emul->e_sendsig)(action, signum, returnmask, trapno, > code, sigval); > + postsig_done(p, signum, ps); > + splx(s); > } > - > - KERNEL_UNLOCK(); > } > > /* > @@ -1816,7 +1817,7 @@ filt_signal(struct knote *kn, long hint) > void > userret(struct proc *p) > { > - int sig; > + int signum; > > /* send SIGPROF or SIGVTALRM if their timers interrupted this thread */ > if (p->p_flag & P_PROFPEND) { > @@ -1832,8 +1833,12 @@ userret(struct proc *p) > KERNEL_UNLOCK(); > } > > - while ((sig = CURSIG(p)) != 0) > - postsig(sig); > + if (CURSIG(p) != 0) { > + KERNEL_LOCK(); > + while ((signum = CURSIG(p)) != 0) > + postsig(p, signum); > + KERNEL_UNLOCK(); > + } > > /* > * If P_SIGSUSPEND is still set here, then we still need to restore > @@ -1845,8 +1850,10 @@ userret(struct proc *p) > atomic_clearbits_int(&p->p_flag, P_SIGSUSPEND); > p->p_sigmask = p->p_oldmask; > > - while ((sig = CURSIG(p)) != 0) > - postsig(sig); > + KERNEL_LOCK(); > + while ((signum = CURSIG(p)) != 0) > + postsig(p, signum); > + KERNEL_UNLOCK(); > } > > if (p->p_flag & P_SUSPSINGLE) { > Index: sys/signalvar.h > =================================================================== > RCS file: /cvs/src/sys/sys/signalvar.h,v > retrieving revision 1.28 > diff -u -p -r1.28 signalvar.h > --- sys/signalvar.h 5 May 2015 02:13:46 -0000 1.28 > +++ sys/signalvar.h 21 Feb 2018 13:04:15 -0000 > @@ -151,7 +151,6 @@ void gsignal(int pgid, int sig); > void csignal(pid_t pgid, int signum, uid_t uid, uid_t euid); > int issignal(struct proc *p); > void pgsignal(struct pgrp *pgrp, int sig, int checkctty); > -void postsig(int sig); > void psignal(struct proc *p, int sig); > void ptsignal(struct proc *p, int sig, enum signal_type type); > #define prsignal(pr,sig) ptsignal((pr)->ps_mainproc, (sig), SPROCESS) >
Hi, I tried your patch on a source build of about 25 februari on amd64. I still notice mpv hanging when I try to close it or when the video ends. Let me know how/if I can help you further with testing. -- Kind regards, Hiltjo
