On Wed, Feb 21, 2018 at 02:15:07PM +0100, Martin Pieuchot wrote:
> Diff below fixes a race in signal handling that can be triggered when
> a multi-threaded process installs a signal handler with SA_RESETHAND.
> 
> I described a case of this problem on bugs@ a month ago:
>   https://marc.info/?l=openbsd-bugs&m=151672599508049&w=2
> 
> The race occurs when one of the threads is doing a NOLOCK syscall, like
> futex(2):
> 
> CPU0:
>   Before returning to userland this thread will call CURSIG() without
>   KERNEL_LOCK(), issignal() will look at `ps_sigact', see that there's
>   a handler and return the signal number.  After that the thread will
>   call postsig() and spin on the KERNEL_LOCK().
> 
> CPU1:
>   In the meantime another thread, with the KERNEL_LOCK(), also looked at
>   `ps_sigact' and is already processing the signal.  Since the handler
>   has been installed with SA_RESETHAND it will reset the action to
>   SIG_DFL then release the KERNEL_LOCK().
> 
> CPU0:
>   After grabbing the KERNEL_LOCK() inside postsig(), the thread tries
>   to process an already processed signal, see that the actions is
>   SIG_DFL and call sigexit() while all other threads are SSTOP'ed.
> 
> 
> Diff below fixes that by serializing changes to `ps_sigact' via the
> KERNEL_LOCK().  I also introduce postsig_done(), gypped from FreeBSD,
> to assert that the lock is held and to prepare for finer grained
> locking.
> 
> Tests, oks?
> 
> Index: kern/kern_sig.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sig.c,v
> retrieving revision 1.215
> diff -u -p -r1.215 kern_sig.c
> --- kern/kern_sig.c   19 Feb 2018 08:59:52 -0000      1.215
> +++ kern/kern_sig.c   21 Feb 2018 13:09:54 -0000
> @@ -82,6 +82,7 @@ void proc_stop(struct proc *p, int);
>  void proc_stop_sweep(void *);
>  struct timeout proc_stop_to;
>  
> +void postsig(struct proc *, int);
>  int cansignal(struct proc *, struct process *, int);
>  
>  struct pool sigacts_pool;    /* memory pool for sigacts structures */
> @@ -747,6 +748,27 @@ pgsignal(struct pgrp *pgrp, int signum, 
>  }
>  
>  /*
> + * Recalculate the signal mask and reset the signal disposition after
> + * usermode frame for delivery is formed.
> + */
> +void
> +postsig_done(struct proc *p, int signum, struct sigacts *ps)
> +{
> +     int mask = sigmask(signum);
> +
> +     KERNEL_ASSERT_LOCKED();
> +
> +     p->p_ru.ru_nsignals++;
> +     atomic_setbits_int(&p->p_sigmask, ps->ps_catchmask[signum]);
> +     if ((ps->ps_sigreset & mask) != 0) {
> +             ps->ps_sigcatch &= ~mask;
> +             if (signum != SIGCONT && sigprop[signum] & SA_IGNORE)
> +                     ps->ps_sigignore |= mask;
> +             ps->ps_sigact[signum] = SIG_DFL;
> +     }
> +}
> +
> +/*
>   * Send a signal caused by a trap to the current thread
>   * If it will be caught immediately, deliver it with correct code.
>   * Otherwise, post it normally.
> @@ -780,16 +802,9 @@ trapsignal(struct proc *p, int signum, u
>                           p->p_sigmask, code, &si);
>               }
>  #endif
> -             p->p_ru.ru_nsignals++;
>               (*pr->ps_emul->e_sendsig)(ps->ps_sigact[signum], signum,
>                   p->p_sigmask, trapno, code, sigval);
> -             atomic_setbits_int(&p->p_sigmask, ps->ps_catchmask[signum]);
> -             if ((ps->ps_sigreset & mask) != 0) {
> -                     ps->ps_sigcatch &= ~mask;
> -                     if (signum != SIGCONT && sigprop[signum] & SA_IGNORE)
> -                             ps->ps_sigignore |= mask;
> -                     ps->ps_sigact[signum] = SIG_DFL;
> -             }
> +             postsig_done(p, signum, ps);
>       } else {
>               p->p_sisig = signum;
>               p->p_sitrapno = trapno; /* XXX for core dump/debugger */
> @@ -1330,9 +1345,8 @@ proc_stop_sweep(void *v)
>   * from the current set of pending signals.
>   */
>  void
> -postsig(int signum)
> +postsig(struct proc *p, int signum)
>  {
> -     struct proc *p = curproc;
>       struct process *pr = p->p_p;
>       struct sigacts *ps = pr->ps_sigacts;
>       sig_t action;
> @@ -1341,12 +1355,8 @@ postsig(int signum)
>       union sigval sigval;
>       int s, code;
>  
> -#ifdef DIAGNOSTIC
> -     if (signum == 0)
> -             panic("postsig");
> -#endif
> -
> -     KERNEL_LOCK();
> +     KASSERT(signum != 0);
> +     KERNEL_ASSERT_LOCKED();
>  
>       mask = sigmask(signum);
>       atomic_clearbits_int(&p->p_siglist, mask);
> @@ -1366,7 +1376,7 @@ postsig(int signum)
>  #ifdef KTRACE
>       if (KTRPOINT(p, KTR_PSIG)) {
>               siginfo_t si;
> -             
> +
>               initsiginfo(&si, signum, trapno, code, sigval);
>               ktrpsig(p, signum, action, p->p_flag & P_SIGSUSPEND ?
>                   p->p_oldmask : p->p_sigmask, code, &si);
> @@ -1407,15 +1417,6 @@ postsig(int signum)
>               } else {
>                       returnmask = p->p_sigmask;
>               }
> -             atomic_setbits_int(&p->p_sigmask, ps->ps_catchmask[signum]);
> -             if ((ps->ps_sigreset & mask) != 0) {
> -                     ps->ps_sigcatch &= ~mask;
> -                     if (signum != SIGCONT && sigprop[signum] & SA_IGNORE)
> -                             ps->ps_sigignore |= mask;
> -                     ps->ps_sigact[signum] = SIG_DFL;
> -             }
> -             splx(s);
> -             p->p_ru.ru_nsignals++;
>               if (p->p_sisig == signum) {
>                       p->p_sisig = 0;
>                       p->p_sitrapno = 0;
> @@ -1425,9 +1426,9 @@ postsig(int signum)
>  
>               (*pr->ps_emul->e_sendsig)(action, signum, returnmask, trapno,
>                   code, sigval);
> +             postsig_done(p, signum, ps);
> +             splx(s);
>       }
> -
> -     KERNEL_UNLOCK();
>  }
>  
>  /*
> @@ -1816,7 +1817,7 @@ filt_signal(struct knote *kn, long hint)
>  void
>  userret(struct proc *p)
>  {
> -     int sig;
> +     int signum;
>  
>       /* send SIGPROF or SIGVTALRM if their timers interrupted this thread */
>       if (p->p_flag & P_PROFPEND) {
> @@ -1832,8 +1833,12 @@ userret(struct proc *p)
>               KERNEL_UNLOCK();
>       }
>  
> -     while ((sig = CURSIG(p)) != 0)
> -             postsig(sig);
> +     if (CURSIG(p) != 0) {
> +             KERNEL_LOCK();
> +             while ((signum = CURSIG(p)) != 0)
> +                     postsig(p, signum);
> +             KERNEL_UNLOCK();
> +     }
>  
>       /*
>        * If P_SIGSUSPEND is still set here, then we still need to restore
> @@ -1845,8 +1850,10 @@ userret(struct proc *p)
>               atomic_clearbits_int(&p->p_flag, P_SIGSUSPEND);
>               p->p_sigmask = p->p_oldmask;
>  
> -             while ((sig = CURSIG(p)) != 0)
> -                     postsig(sig);
> +             KERNEL_LOCK();
> +             while ((signum = CURSIG(p)) != 0)
> +                     postsig(p, signum);
> +             KERNEL_UNLOCK();
>       }
>  
>       if (p->p_flag & P_SUSPSINGLE) {
> Index: sys/signalvar.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/signalvar.h,v
> retrieving revision 1.28
> diff -u -p -r1.28 signalvar.h
> --- sys/signalvar.h   5 May 2015 02:13:46 -0000       1.28
> +++ sys/signalvar.h   21 Feb 2018 13:04:15 -0000
> @@ -151,7 +151,6 @@ void      gsignal(int pgid, int sig);
>  void csignal(pid_t pgid, int signum, uid_t uid, uid_t euid);
>  int  issignal(struct proc *p);
>  void pgsignal(struct pgrp *pgrp, int sig, int checkctty);
> -void postsig(int sig);
>  void psignal(struct proc *p, int sig);
>  void ptsignal(struct proc *p, int sig, enum signal_type type);
>  #define prsignal(pr,sig)     ptsignal((pr)->ps_mainproc, (sig), SPROCESS)
> 

Hi,

I tried your patch on a source build of about 25 februari on amd64. I still
notice mpv hanging when I try to close it or when the video ends.

Let me know how/if I can help you further with testing.

-- 
Kind regards,
Hiltjo

Reply via email to