On Wed, Jun 03, 2009 at 05:30:51PM +0300, Kostik Belousov wrote: > On Wed, Jun 03, 2009 at 04:10:34PM +0300, Vlad Galu wrote: > > Hm, I was having an issue with an internal piece of software, but > > never checked what kind of pipe caused the problem. Turns out it was a > > FIFO, and I got bitten by the same bug described here: > > http://lists.freebsd.org/pipermail/freebsd-bugs/2006-March/017591.html > > > > The problem is that the reader process isn't notified when the writer > > process exits or closes the FIFO fd... > > So you did found the relevant PR with long audit trail and patches > attached. You obviously should contact the author of the patches, > Oliver Fromme, who is FreeBSD committer for some time (CCed). > > I agree that the thing shall be fixed finally. Skimming over the > patches in kern/94772, I have some doubts about removal of > POLLINIGNEOF flag. The reason is that we are generally do not > remove exposed user interfaces.
I forward-ported Bruce' patch to the CURRENT. It passes the tests from tools/regression/fifo and a test from kern/94772. For my liking, I did not removed POLLINIGNEOF. diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c index 66963bc..7e279ca 100644 --- a/sys/fs/fifofs/fifo_vnops.c +++ b/sys/fs/fifofs/fifo_vnops.c @@ -226,11 +226,47 @@ fail1: if (ap->a_mode & FREAD) { fip->fi_readers++; if (fip->fi_readers == 1) { + SOCKBUF_LOCK(&fip->fi_readsock->so_rcv); + if (fip->fi_writers > 0) + fip->fi_readsock->so_rcv.sb_state |= + SBS_COULDRCV; + else + /* + * Sloppy? Might be necessary to clear it + * in all the places where fi_readers is + * decremented to 0. I think only writers + * polling for input could be confused by + * having it not set, and there is a problem + * with these anyway now that we have + * reversed the sense of the flag -- they + * now block (?), but shouldn't. + */ + fip->fi_readsock->so_rcv.sb_state &= + ~SBS_COULDRCV; + SOCKBUF_UNLOCK(&fip->fi_readsock->so_rcv); SOCKBUF_LOCK(&fip->fi_writesock->so_snd); fip->fi_writesock->so_snd.sb_state &= ~SBS_CANTSENDMORE; SOCKBUF_UNLOCK(&fip->fi_writesock->so_snd); if (fip->fi_writers > 0) { wakeup(&fip->fi_writers); + SOCKBUF_LOCK(&fip->fi_readsock->so_rcv); + if (fip->fi_writers > 0) + fip->fi_readsock->so_rcv.sb_state |= + SBS_COULDRCV; + else + /* + * Sloppy? Might be necessary to clear it + * in all the places where fi_readers is + * decremented to 0. I think only writers + * polling for input could be confused by + * having it not set, and there is a problem + * with these anyway now that we have + * reversed the sense of the flag -- they + * now block (?), but shouldn't. + */ + fip->fi_readsock->so_rcv.sb_state &= + ~SBS_COULDRCV; + SOCKBUF_UNLOCK(&fip->fi_readsock->so_rcv); sowwakeup(fip->fi_writesock); } } @@ -244,6 +280,7 @@ fail1: if (fip->fi_writers == 1) { SOCKBUF_LOCK(&fip->fi_readsock->so_rcv); fip->fi_readsock->so_rcv.sb_state &= ~SBS_CANTRCVMORE; + fip->fi_readsock->so_rcv.sb_state |= SBS_COULDRCV; SOCKBUF_UNLOCK(&fip->fi_readsock->so_rcv); if (fip->fi_readers > 0) { wakeup(&fip->fi_readers); @@ -672,28 +709,10 @@ fifo_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td) levents = events & (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | POLLRDBAND); if ((fp->f_flag & FREAD) && levents) { - /* - * If POLLIN or POLLRDNORM is requested and POLLINIGNEOF is - * not, then convert the first two to the last one. This - * tells the socket poll function to ignore EOF so that we - * block if there is no writer (and no data). Callers can - * set POLLINIGNEOF to get non-blocking behavior. - */ - if (levents & (POLLIN | POLLRDNORM) && - !(levents & POLLINIGNEOF)) { - levents &= ~(POLLIN | POLLRDNORM); - levents |= POLLINIGNEOF; - } - filetmp.f_data = fip->fi_readsock; filetmp.f_cred = cred; - revents |= soo_poll(&filetmp, levents, cred, td); - - /* Reverse the above conversion. */ - if ((revents & POLLINIGNEOF) && !(events & POLLINIGNEOF)) { - revents |= (events & (POLLIN | POLLRDNORM)); - revents &= ~POLLINIGNEOF; - } + revents |= soo_poll(&filetmp, levents | (events & POLLPOLL), + cred, td); } levents = events & (POLLOUT | POLLWRNORM | POLLWRBAND); if ((fp->f_flag & FWRITE) && levents) { diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index 616c5b7..98ccc9e 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -1226,8 +1226,8 @@ pollscan(td, fds, nfd) * POLLERR if appropriate. */ selfdalloc(td, fds); - fds->revents = fo_poll(fp, fds->events, - td->td_ucred, td); + fds->revents = fo_poll(fp, + fds->events | POLLPOLL, td->td_ucred, td); if (fds->revents != 0) n++; } diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 7341d3f..a13d648 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -2706,6 +2706,42 @@ sopoll_generic(struct socket *so, int events, struct ucred *active_cred, if (sowriteable(so)) revents |= events & (POLLOUT | POLLWRNORM); + /* + * SBS_CANTRCVMORE (which is checked by soreadable()) normally + * implies EOF (and thus readable) and hung up, but for + * compatibility with other systems and to obtain behavior that + * is otherwise unavailable we make the case of poll() on a fifo + * that has never had any writers during the lifetime of any + * current reader special: then we pretend that the fifo is + * unreadable unless it contains non-null data, and that it is + * not hung up. The POLLPOLL flag is set by poll() to identify + * poll() here, and the SBS_COULDRCV flag is set by the fifo + * layer to indicate a fifo that is not in the special state. + */ + if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { + if (!(events & POLLPOLL) || so->so_rcv.sb_state & SBS_COULDRCV) + revents |= POLLHUP; /* finish settings */ + else if (!(so->so_rcv.sb_cc >= so->so_rcv.sb_lowat || + !TAILQ_EMPTY(&so->so_comp) || so->so_error)) + revents &= ~(POLLIN | POLLRDNORM); /* undo settings */ + } + + /* + * Testing of hangup for writers could be optimized by combining + * it with testing for writeability, but we keep the test separate + * and with the same organization as the test for readers for + * clarity. Note that writeable implies not hung up, so if POLLHUP + * is set here then (POLLOUT | POLLWRNORM) is not set above, as + * standards require. Less obviously, if POLLHUP was set above for + * a reader, then the output flags cannot have been set above for + * a writer. Even less obviously, we cannot end up with both + * POLLHUP output flags set in revents after ORing the revents for + * the read and write socket in fifo_poll(). + */ + if (so->so_snd.sb_state & SBS_CANTSENDMORE) + revents |= POLLHUP; + + if (events & (POLLPRI | POLLRDBAND)) if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK)) revents |= events & (POLLPRI | POLLRDBAND); diff --git a/sys/sys/poll.h b/sys/sys/poll.h index c955f32..cfd5f01 100644 --- a/sys/sys/poll.h +++ b/sys/sys/poll.h @@ -71,6 +71,10 @@ struct pollfd { #define POLLINIGNEOF 0x2000 /* like POLLIN, except ignore EOF */ #endif +#ifdef _KERNEL +#define POLLPOLL 0x8000 /* system call is actually poll() */ +#endif + /* * These events are set if they occur regardless of whether they were * requested. diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h index b8e6699..0da4fa1 100644 --- a/sys/sys/sockbuf.h +++ b/sys/sys/sockbuf.h @@ -56,6 +56,7 @@ #define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */ #define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */ #define SBS_RCVATMARK 0x0040 /* at mark on input */ +#define SBS_COULDRCV 0x0080 /* could receive previously (or now) */ struct mbuf; struct sockaddr;
pgpReKbHxIfLR.pgp
Description: PGP signature