On Fri, Mar 02, 2018 at 02:18:46PM +0100, Hiltjo Posthuma wrote:
> On Wed, Feb 21, 2018 at 02:15:07PM +0100, Martin Pieuchot wrote:
> > Diff below fixes a race in signal handling that can be triggered when
> > a multi-threaded process installs a signal handler with SA_RESETHAND.
> > 
> > I described a case of this problem on bugs@ a month ago:
> >   https://marc.info/?l=openbsd-bugs&m=151672599508049&w=2
> > 
> > The race occurs when one of the threads is doing a NOLOCK syscall, like
> > futex(2):
> > 
> > CPU0:
> >   Before returning to userland this thread will call CURSIG() without
> >   KERNEL_LOCK(), issignal() will look at `ps_sigact', see that there's
> >   a handler and return the signal number.  After that the thread will
> >   call postsig() and spin on the KERNEL_LOCK().
> > 
> > CPU1:
> >   In the meantime another thread, with the KERNEL_LOCK(), also looked at
> >   `ps_sigact' and is already processing the signal.  Since the handler
> >   has been installed with SA_RESETHAND it will reset the action to
> >   SIG_DFL then release the KERNEL_LOCK().
> > 
> > CPU0:
> >   After grabbing the KERNEL_LOCK() inside postsig(), the thread tries
> >   to process an already processed signal, see that the actions is
> >   SIG_DFL and call sigexit() while all other threads are SSTOP'ed.
> > 
> > 
> > Diff below fixes that by serializing changes to `ps_sigact' via the
> > KERNEL_LOCK().  I also introduce postsig_done(), gypped from FreeBSD,
> > to assert that the lock is held and to prepare for finer grained
> > locking.
> > 
> > Tests, oks?
> > 
> > Index: kern/kern_sig.c
> > ===================================================================
> > RCS file: /cvs/src/sys/kern/kern_sig.c,v
> > retrieving revision 1.215
> > diff -u -p -r1.215 kern_sig.c
> > --- kern/kern_sig.c 19 Feb 2018 08:59:52 -0000      1.215
> > +++ kern/kern_sig.c 21 Feb 2018 13:09:54 -0000
> > @@ -82,6 +82,7 @@ void proc_stop(struct proc *p, int);
> >  void proc_stop_sweep(void *);
> >  struct timeout proc_stop_to;
> >  
> > +void postsig(struct proc *, int);
> >  int cansignal(struct proc *, struct process *, int);
> >  
> >  struct pool sigacts_pool;  /* memory pool for sigacts structures */
> > @@ -747,6 +748,27 @@ pgsignal(struct pgrp *pgrp, int signum, 
> >  }
> >  
> >  /*
> > + * Recalculate the signal mask and reset the signal disposition after
> > + * usermode frame for delivery is formed.
> > + */
> > +void
> > +postsig_done(struct proc *p, int signum, struct sigacts *ps)
> > +{
> > +   int mask = sigmask(signum);
> > +
> > +   KERNEL_ASSERT_LOCKED();
> > +
> > +   p->p_ru.ru_nsignals++;
> > +   atomic_setbits_int(&p->p_sigmask, ps->ps_catchmask[signum]);
> > +   if ((ps->ps_sigreset & mask) != 0) {
> > +           ps->ps_sigcatch &= ~mask;
> > +           if (signum != SIGCONT && sigprop[signum] & SA_IGNORE)
> > +                   ps->ps_sigignore |= mask;
> > +           ps->ps_sigact[signum] = SIG_DFL;
> > +   }
> > +}
> > +
> > +/*
> >   * Send a signal caused by a trap to the current thread
> >   * If it will be caught immediately, deliver it with correct code.
> >   * Otherwise, post it normally.
> > @@ -780,16 +802,9 @@ trapsignal(struct proc *p, int signum, u
> >                         p->p_sigmask, code, &si);
> >             }
> >  #endif
> > -           p->p_ru.ru_nsignals++;
> >             (*pr->ps_emul->e_sendsig)(ps->ps_sigact[signum], signum,
> >                 p->p_sigmask, trapno, code, sigval);
> > -           atomic_setbits_int(&p->p_sigmask, ps->ps_catchmask[signum]);
> > -           if ((ps->ps_sigreset & mask) != 0) {
> > -                   ps->ps_sigcatch &= ~mask;
> > -                   if (signum != SIGCONT && sigprop[signum] & SA_IGNORE)
> > -                           ps->ps_sigignore |= mask;
> > -                   ps->ps_sigact[signum] = SIG_DFL;
> > -           }
> > +           postsig_done(p, signum, ps);
> >     } else {
> >             p->p_sisig = signum;
> >             p->p_sitrapno = trapno; /* XXX for core dump/debugger */
> > @@ -1330,9 +1345,8 @@ proc_stop_sweep(void *v)
> >   * from the current set of pending signals.
> >   */
> >  void
> > -postsig(int signum)
> > +postsig(struct proc *p, int signum)
> >  {
> > -   struct proc *p = curproc;
> >     struct process *pr = p->p_p;
> >     struct sigacts *ps = pr->ps_sigacts;
> >     sig_t action;
> > @@ -1341,12 +1355,8 @@ postsig(int signum)
> >     union sigval sigval;
> >     int s, code;
> >  
> > -#ifdef DIAGNOSTIC
> > -   if (signum == 0)
> > -           panic("postsig");
> > -#endif
> > -
> > -   KERNEL_LOCK();
> > +   KASSERT(signum != 0);
> > +   KERNEL_ASSERT_LOCKED();
> >  
> >     mask = sigmask(signum);
> >     atomic_clearbits_int(&p->p_siglist, mask);
> > @@ -1366,7 +1376,7 @@ postsig(int signum)
> >  #ifdef KTRACE
> >     if (KTRPOINT(p, KTR_PSIG)) {
> >             siginfo_t si;
> > -           
> > +
> >             initsiginfo(&si, signum, trapno, code, sigval);
> >             ktrpsig(p, signum, action, p->p_flag & P_SIGSUSPEND ?
> >                 p->p_oldmask : p->p_sigmask, code, &si);
> > @@ -1407,15 +1417,6 @@ postsig(int signum)
> >             } else {
> >                     returnmask = p->p_sigmask;
> >             }
> > -           atomic_setbits_int(&p->p_sigmask, ps->ps_catchmask[signum]);
> > -           if ((ps->ps_sigreset & mask) != 0) {
> > -                   ps->ps_sigcatch &= ~mask;
> > -                   if (signum != SIGCONT && sigprop[signum] & SA_IGNORE)
> > -                           ps->ps_sigignore |= mask;
> > -                   ps->ps_sigact[signum] = SIG_DFL;
> > -           }
> > -           splx(s);
> > -           p->p_ru.ru_nsignals++;
> >             if (p->p_sisig == signum) {
> >                     p->p_sisig = 0;
> >                     p->p_sitrapno = 0;
> > @@ -1425,9 +1426,9 @@ postsig(int signum)
> >  
> >             (*pr->ps_emul->e_sendsig)(action, signum, returnmask, trapno,
> >                 code, sigval);
> > +           postsig_done(p, signum, ps);
> > +           splx(s);
> >     }
> > -
> > -   KERNEL_UNLOCK();
> >  }
> >  
> >  /*
> > @@ -1816,7 +1817,7 @@ filt_signal(struct knote *kn, long hint)
> >  void
> >  userret(struct proc *p)
> >  {
> > -   int sig;
> > +   int signum;
> >  
> >     /* send SIGPROF or SIGVTALRM if their timers interrupted this thread */
> >     if (p->p_flag & P_PROFPEND) {
> > @@ -1832,8 +1833,12 @@ userret(struct proc *p)
> >             KERNEL_UNLOCK();
> >     }
> >  
> > -   while ((sig = CURSIG(p)) != 0)
> > -           postsig(sig);
> > +   if (CURSIG(p) != 0) {
> > +           KERNEL_LOCK();
> > +           while ((signum = CURSIG(p)) != 0)
> > +                   postsig(p, signum);
> > +           KERNEL_UNLOCK();
> > +   }
> >  
> >     /*
> >      * If P_SIGSUSPEND is still set here, then we still need to restore
> > @@ -1845,8 +1850,10 @@ userret(struct proc *p)
> >             atomic_clearbits_int(&p->p_flag, P_SIGSUSPEND);
> >             p->p_sigmask = p->p_oldmask;
> >  
> > -           while ((sig = CURSIG(p)) != 0)
> > -                   postsig(sig);
> > +           KERNEL_LOCK();
> > +           while ((signum = CURSIG(p)) != 0)
> > +                   postsig(p, signum);
> > +           KERNEL_UNLOCK();
> >     }
> >  
> >     if (p->p_flag & P_SUSPSINGLE) {
> > Index: sys/signalvar.h
> > ===================================================================
> > RCS file: /cvs/src/sys/sys/signalvar.h,v
> > retrieving revision 1.28
> > diff -u -p -r1.28 signalvar.h
> > --- sys/signalvar.h 5 May 2015 02:13:46 -0000       1.28
> > +++ sys/signalvar.h 21 Feb 2018 13:04:15 -0000
> > @@ -151,7 +151,6 @@ void    gsignal(int pgid, int sig);
> >  void       csignal(pid_t pgid, int signum, uid_t uid, uid_t euid);
> >  int        issignal(struct proc *p);
> >  void       pgsignal(struct pgrp *pgrp, int sig, int checkctty);
> > -void       postsig(int sig);
> >  void       psignal(struct proc *p, int sig);
> >  void       ptsignal(struct proc *p, int sig, enum signal_type type);
> >  #define prsignal(pr,sig)   ptsignal((pr)->ps_mainproc, (sig), SPROCESS)
> > 
> 
> Hi,
> 
> I tried your patch on a source build of about 25 februari on amd64. I still
> notice mpv hanging when I try to close it or when the video ends.
> 
> Let me know how/if I can help you further with testing.
> 

Updated to source of ~4 March:

OpenBSD ren.laptop 6.3 GENERIC.MP#0 amd64

When I do a backtrace on the hanged mpv I get:


$ gdb mpv pid
$ bt

#0  _thread_sys___thrsleep () at -:3
#1  0x0000117420965ea4 in _sem_wait (sem=0x1174219ffc40, tryonly=564132932, 
abstime=0x0,
    delayed_cancel=0x11738fa402f0) at /usr/src/lib/librthread/rthread_sem.c:74
#2  0x00001174209650bf in pthread_join (thread=0x1174219ffc40, retval=0x0)
    at /usr/src/lib/librthread/rthread.c:304
#3  0x00001173ec0a9d5b in SDL_WaitThread_REAL () from 
/usr/local/lib/libSDL2.so.0.4
#4  0x00001173ec05b64a in close_audio_device () from 
/usr/local/lib/libSDL2.so.0.4
#5  0x00001173ec05a1e2 in SDL_AudioQuit_REAL () from 
/usr/local/lib/libSDL2.so.0.4
#6  0x00001173ec05703b in SDL_QuitSubSystem_REAL () from 
/usr/local/lib/libSDL2.so.0.4
#7  0x000011713bf135e9 in ao_uninit (ao=0x11740b004740) at ../audio/out/ao.c:336
#8  0x000011713bf561fe in uninit_audio_out (mpctx=0x1173a2d8b840) at 
../player/audio.c:280
#9  0x000011713bf6efd2 in mp_destroy (mpctx=0x1173a2d8b840) at 
../player/main.c:166
#10 0x000011713bf6fbe1 in mpv_main (argc=Variable "argc" is not available.
) at ../player/main.c:243
#11 0x000011713bf03156 in _start () from /usr/local/bin/mpv
#12 0x0000000000000000 in ?? ()
(gdb)


I hope this helps in some way (no patch, sorry :)),

-- 
Kind regards,
Hiltjo

Reply via email to