On Fri, May 01, 2020 at 05:17:36PM +0200, Martin Pieuchot wrote:
> On 01/05/20(Fri) 12:13, Anton Lindqvist wrote:
> > The order in which the pty master/slave is closed seems to be the
> > trigger here. While not duping the master, it's closed before the slave.
> > In the opposite scenario, the slave is closed before the master. While
> > closing the slave, it ends up here expressed as a simplified backtrace:
> > 
> >   tsleep()
> >   ttysleep()
> >   ttywait()
> >   ttywflush()
> >   ttylclose()
> >   ptsclose()
> >   fdfree()
> >   exit1()
> > 
> > In order words, it ends up doing a tsleep(INFSLP) causing the thread to
> > hang. Note that this is not the case when the master is closed before
> > the slave since `tp->t_oproc == NULL' causing ttywait() to bail early.
> 
> Why is the sleeper never awaken?  Does that mean a ttwakeup() is missing?

In this case, the process is single threaded, about to exit and the only
consumer of the pty. I don't see how it could be any other process
responsibility to perform the wakeup.

> 
> > NetBSD does a sleep with a timeout in ttywflush(). I've applied the same
> > approach in the diff below which does fix the hang.
> 
> This seems like a racy workaround for a bug that we do not fully
> understand.  If this is a proper solution I'd be happy to understand
> why.  If we go with such fix we should be using a value in "nsecs"
> instead of ticks and INFSLP should be used instead of 0.  We should
> refrain from introducing new usages of `hz' ;)

This is corresponding commit message[1] from NetBSD which describes what
we're seeing:

  commit a5bbe319b2ea24c4c2133a51eac8a0285dc074e4
  Author: gson <[email protected]>
  Date:   Wed Aug 19 12:02:55 2015 +0000

      When closing a tty, limit the amount of time spent waiting for the
      output to drain to five seconds so that exiting processes with
      buffered output for a serial port blocked by flow control or a pty
      that is not being read do not hang indefinitely.  Should fix PRs
      kern/12534 and kern/17171.  This is an updated version of the change
      of tty.c 1.263.

Updated diff favoring INFSLP and SEC_TO_NSEC().

[1] https://github.com/IIJ-NetBSD/netbsd-src/commit/a5bbe319b2ea

Index: kern/kern_proc.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_proc.c,v
retrieving revision 1.86
diff -u -p -r1.86 kern_proc.c
--- kern/kern_proc.c    30 Jan 2020 08:51:27 -0000      1.86
+++ kern/kern_proc.c    2 May 2020 08:35:03 -0000
@@ -410,7 +410,7 @@ killjobc(struct process *pr)
                        if (sp->s_ttyp->t_session == sp) {
                                if (sp->s_ttyp->t_pgrp)
                                        pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1);
-                               ttywait(sp->s_ttyp);
+                               ttywait(sp->s_ttyp, INFSLP);
                                /*
                                 * The tty could have been revoked
                                 * if we blocked.
Index: kern/tty.c
===================================================================
RCS file: /cvs/src/sys/kern/tty.c,v
retrieving revision 1.154
diff -u -p -r1.154 tty.c
--- kern/tty.c  7 Apr 2020 13:27:51 -0000       1.154
+++ kern/tty.c  2 May 2020 08:35:03 -0000
@@ -809,7 +809,7 @@ ttioctl(struct tty *tp, u_long cmd, cadd
                break;
        }
        case TIOCDRAIN:                 /* wait till output drained */
-               if ((error = ttywait(tp)) != 0)
+               if ((error = ttywait(tp, INFSLP)) != 0)
                        return (error);
                break;
        case TIOCGETA: {                /* get termios struct */
@@ -866,7 +866,7 @@ ttioctl(struct tty *tp, u_long cmd, cadd
 
                s = spltty();
                if (cmd == TIOCSETAW || cmd == TIOCSETAF) {
-                       if ((error = ttywait(tp)) != 0) {
+                       if ((error = ttywait(tp, INFSLP)) != 0) {
                                splx(s);
                                return (error);
                        }
@@ -1205,7 +1205,7 @@ ttnread(struct tty *tp)
  * Wait for output to drain.
  */
 int
-ttywait(struct tty *tp)
+ttywait(struct tty *tp, int timo)
 {
        int error, s;
 
@@ -1219,7 +1219,8 @@ ttywait(struct tty *tp)
                    (ISSET(tp->t_state, TS_CARR_ON) || ISSET(tp->t_cflag, 
CLOCAL))
                    && tp->t_oproc) {
                        SET(tp->t_state, TS_ASLEEP);
-                       error = ttysleep(tp, &tp->t_outq, TTOPRI | PCATCH, 
ttyout);
+                       error = ttysleep_timo(tp, &tp->t_outq, TTOPRI | PCATCH,
+                           ttyout, timo);
                        if (error)
                                break;
                } else
@@ -1237,7 +1238,8 @@ ttywflush(struct tty *tp)
 {
        int error;
 
-       if ((error = ttywait(tp)) == 0)
+       error = ttywait(tp, SEC_TO_NSEC(5));
+       if (error == 0 || error == EWOULDBLOCK)
                ttyflush(tp, FREAD);
        return (error);
 }
@@ -2281,11 +2283,18 @@ tputchar(int c, struct tty *tp)
 int
 ttysleep(struct tty *tp, void *chan, int pri, char *wmesg)
 {
+
+       return (ttysleep_timo(tp, chan, pri, wmesg, INFSLP));
+}
+
+int
+ttysleep_timo(struct tty *tp, void *chan, int pri, char *wmesg, int timo)
+{
        int error;
        short gen;
 
        gen = tp->t_gen;
-       if ((error = tsleep_nsec(chan, pri, wmesg, INFSLP)) != 0)
+       if ((error = tsleep_nsec(chan, pri, wmesg, timo)) != 0)
                return (error);
        return (tp->t_gen == gen ? 0 : ERESTART);
 }
Index: sys/tty.h
===================================================================
RCS file: /cvs/src/sys/sys/tty.h,v
retrieving revision 1.38
diff -u -p -r1.38 tty.h
--- sys/tty.h   19 Jul 2019 00:17:16 -0000      1.38
+++ sys/tty.h   2 May 2020 08:35:03 -0000
@@ -293,7 +293,9 @@ void         ttypend(struct tty *tp);
 void    ttyretype(struct tty *tp);
 void    ttyrub(int c, struct tty *tp);
 int     ttysleep(struct tty *tp, void *chan, int pri, char *wmesg);
-int     ttywait(struct tty *tp);
+int     ttysleep_timo(struct tty *tp, void *chan, int pri, char *wmesg,
+           int timo);
+int     ttywait(struct tty *tp, int timo);
 int     ttywflush(struct tty *tp);
 void    ttytstamp(struct tty *tp, int octs, int ncts, int odcd, int ndcd);
 

Reply via email to