On Wed, Jun 03, 2009 at 05:30:51PM +0300, Kostik Belousov wrote:
> On Wed, Jun 03, 2009 at 04:10:34PM +0300, Vlad Galu wrote:
> > Hm, I was having an issue with an internal piece of software, but
> > never checked what kind of pipe caused the problem. Turns out it was a
> > FIFO, and I got bitten by the same bug described here:
> > http://lists.freebsd.org/pipermail/freebsd-bugs/2006-March/017591.html
> > 
> > The problem is that the reader process isn't notified when the writer
> > process exits or closes the FIFO fd...
> 
> So you did found the relevant PR with long audit trail and patches
> attached. You obviously should contact the author of the patches,
> Oliver Fromme, who is FreeBSD committer for some time (CCed).
> 
> I agree that the thing shall be fixed finally. Skimming over the
> patches in kern/94772, I have some doubts about removal of
> POLLINIGNEOF flag. The reason is that we are generally do not
> remove exposed user interfaces.

I forward-ported Bruce' patch to the CURRENT. It passes the tests
from tools/regression/fifo and a test from kern/94772.
For my liking, I did not removed POLLINIGNEOF.

diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c
index 66963bc..7e279ca 100644
--- a/sys/fs/fifofs/fifo_vnops.c
+++ b/sys/fs/fifofs/fifo_vnops.c
@@ -226,11 +226,47 @@ fail1:
        if (ap->a_mode & FREAD) {
                fip->fi_readers++;
                if (fip->fi_readers == 1) {
+                       SOCKBUF_LOCK(&fip->fi_readsock->so_rcv);
+                       if (fip->fi_writers > 0)
+                               fip->fi_readsock->so_rcv.sb_state |=
+                                   SBS_COULDRCV;
+                       else
+                               /*
+                                * Sloppy?  Might be necessary to clear it
+                                * in all the places where fi_readers is
+                                * decremented to 0.  I think only writers
+                                * polling for input could be confused by
+                                * having it not set, and there is a problem
+                                * with these anyway now that we have
+                                * reversed the sense of the flag -- they
+                                * now block (?), but shouldn't.
+                                */
+                               fip->fi_readsock->so_rcv.sb_state &=
+                                   ~SBS_COULDRCV;
+                       SOCKBUF_UNLOCK(&fip->fi_readsock->so_rcv);
                        SOCKBUF_LOCK(&fip->fi_writesock->so_snd);
                        fip->fi_writesock->so_snd.sb_state &= ~SBS_CANTSENDMORE;
                        SOCKBUF_UNLOCK(&fip->fi_writesock->so_snd);
                        if (fip->fi_writers > 0) {
                                wakeup(&fip->fi_writers);
+                       SOCKBUF_LOCK(&fip->fi_readsock->so_rcv);
+                       if (fip->fi_writers > 0)
+                               fip->fi_readsock->so_rcv.sb_state |=
+                                   SBS_COULDRCV;
+                       else
+                               /*
+                                * Sloppy?  Might be necessary to clear it
+                                * in all the places where fi_readers is
+                                * decremented to 0.  I think only writers
+                                * polling for input could be confused by
+                                * having it not set, and there is a problem
+                                * with these anyway now that we have
+                                * reversed the sense of the flag -- they
+                                * now block (?), but shouldn't.
+                                */
+                               fip->fi_readsock->so_rcv.sb_state &=
+                                   ~SBS_COULDRCV;
+                       SOCKBUF_UNLOCK(&fip->fi_readsock->so_rcv);
                                sowwakeup(fip->fi_writesock);
                        }
                }
@@ -244,6 +280,7 @@ fail1:
                if (fip->fi_writers == 1) {
                        SOCKBUF_LOCK(&fip->fi_readsock->so_rcv);
                        fip->fi_readsock->so_rcv.sb_state &= ~SBS_CANTRCVMORE;
+                       fip->fi_readsock->so_rcv.sb_state |= SBS_COULDRCV;
                        SOCKBUF_UNLOCK(&fip->fi_readsock->so_rcv);
                        if (fip->fi_readers > 0) {
                                wakeup(&fip->fi_readers);
@@ -672,28 +709,10 @@ fifo_poll_f(struct file *fp, int events, struct ucred 
*cred, struct thread *td)
        levents = events &
            (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | POLLRDBAND);
        if ((fp->f_flag & FREAD) && levents) {
-               /*
-                * If POLLIN or POLLRDNORM is requested and POLLINIGNEOF is
-                * not, then convert the first two to the last one.  This
-                * tells the socket poll function to ignore EOF so that we
-                * block if there is no writer (and no data).  Callers can
-                * set POLLINIGNEOF to get non-blocking behavior.
-                */
-               if (levents & (POLLIN | POLLRDNORM) &&
-                   !(levents & POLLINIGNEOF)) {
-                       levents &= ~(POLLIN | POLLRDNORM);
-                       levents |= POLLINIGNEOF;
-               }
-
                filetmp.f_data = fip->fi_readsock;
                filetmp.f_cred = cred;
-               revents |= soo_poll(&filetmp, levents, cred, td);
-
-               /* Reverse the above conversion. */
-               if ((revents & POLLINIGNEOF) && !(events & POLLINIGNEOF)) {
-                       revents |= (events & (POLLIN | POLLRDNORM));
-                       revents &= ~POLLINIGNEOF;
-               }
+               revents |= soo_poll(&filetmp, levents | (events & POLLPOLL),
+                   cred, td);
        }
        levents = events & (POLLOUT | POLLWRNORM | POLLWRBAND);
        if ((fp->f_flag & FWRITE) && levents) {
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index 616c5b7..98ccc9e 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -1226,8 +1226,8 @@ pollscan(td, fds, nfd)
                                 * POLLERR if appropriate.
                                 */
                                selfdalloc(td, fds);
-                               fds->revents = fo_poll(fp, fds->events,
-                                   td->td_ucred, td);
+                               fds->revents = fo_poll(fp,
+                                   fds->events | POLLPOLL, td->td_ucred, td);
                                if (fds->revents != 0)
                                        n++;
                        }
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 7341d3f..a13d648 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -2706,6 +2706,42 @@ sopoll_generic(struct socket *so, int events, struct 
ucred *active_cred,
                if (sowriteable(so))
                        revents |= events & (POLLOUT | POLLWRNORM);
 
+       /*
+        * SBS_CANTRCVMORE (which is checked by soreadable()) normally
+        * implies EOF (and thus readable) and hung up, but for
+        * compatibility with other systems and to obtain behavior that
+        * is otherwise unavailable we make the case of poll() on a fifo
+        * that has never had any writers during the lifetime of any
+        * current reader special: then we pretend that the fifo is
+        * unreadable unless it contains non-null data, and that it is
+        * not hung up.  The POLLPOLL flag is set by poll() to identify
+        * poll() here, and the SBS_COULDRCV flag is set by the fifo
+        * layer to indicate a fifo that is not in the special state.
+        */
+       if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+               if (!(events & POLLPOLL) || so->so_rcv.sb_state & SBS_COULDRCV)
+                       revents |= POLLHUP;     /* finish settings */
+               else if (!(so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
+                   !TAILQ_EMPTY(&so->so_comp) || so->so_error))
+                       revents &= ~(POLLIN | POLLRDNORM); /* undo settings */
+       }
+
+       /*
+        * Testing of hangup for writers could be optimized by combining
+        * it with testing for writeability, but we keep the test separate
+        * and with the same organization as the test for readers for
+        * clarity.  Note that writeable implies not hung up, so if POLLHUP
+        * is set here then (POLLOUT | POLLWRNORM) is not set above, as
+        * standards require.  Less obviously, if POLLHUP was set above for
+        * a reader, then the output flags cannot have been set above for
+        * a writer.  Even less obviously, we cannot end up with both
+        * POLLHUP output flags set in revents after ORing the revents for
+        * the read and write socket in fifo_poll().
+        */
+       if (so->so_snd.sb_state & SBS_CANTSENDMORE)
+               revents |= POLLHUP;
+
+
        if (events & (POLLPRI | POLLRDBAND))
                if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK))
                        revents |= events & (POLLPRI | POLLRDBAND);
diff --git a/sys/sys/poll.h b/sys/sys/poll.h
index c955f32..cfd5f01 100644
--- a/sys/sys/poll.h
+++ b/sys/sys/poll.h
@@ -71,6 +71,10 @@ struct pollfd {
 #define        POLLINIGNEOF    0x2000          /* like POLLIN, except ignore 
EOF */
 #endif
 
+#ifdef _KERNEL
+#define        POLLPOLL        0x8000          /* system call is actually 
poll() */
+#endif
+
 /*
  * These events are set if they occur regardless of whether they were
  * requested.
diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h
index b8e6699..0da4fa1 100644
--- a/sys/sys/sockbuf.h
+++ b/sys/sys/sockbuf.h
@@ -56,6 +56,7 @@
 #define        SBS_CANTSENDMORE        0x0010  /* can't send more data to peer 
*/
 #define        SBS_CANTRCVMORE         0x0020  /* can't receive more data from 
peer */
 #define        SBS_RCVATMARK           0x0040  /* at mark on input */
+#define        SBS_COULDRCV            0x0080  /* could receive previously (or 
now) */
 
 struct mbuf;
 struct sockaddr;

Attachment: pgpReKbHxIfLR.pgp
Description: PGP signature

Reply via email to