On Sat, Jan 10, 2015 at 03:25:42AM +0100, Andres Freund wrote:
> 0001-Allow-latches-to-wait-for-socket-writability-without.patch
>      Imo pretty close to commit and can be committed independently.

The key open question is whether all platforms of interest can reliably detect
end-of-file when poll()ing or select()ing for write only.  Older GNU/Linux
select() cannot; see attached test program.  We use poll() there anyway, so
the bug in that configuration does not affect PostgreSQL.  Is it a bellwether
of similar bugs in other implementations, bugs that will affect PostgreSQL?

> This previously had explicitly been forbidden in e42a21b9e6c9, as
> there was no use case at that point. We now are looking into making
> FE/BE communication use latches, so it

Truncated sentence.

> +                     if (pfds[0].revents & (POLLHUP | POLLERR | POLLNVAL))
> +                     {
> +                             /* EOF/error condition */
> +                             if (wakeEvents & WL_SOCKET_READABLE)
> +                                     result |= WL_SOCKET_READABLE;
> +                             if (wakeEvents & WL_SOCKET_WRITEABLE)
> +                                     result |= WL_SOCKET_WRITEABLE;
> +                     }

With some poll() implementations (e.g. OS X), this can wrongly report
WL_SOCKET_WRITEABLE if the peer used shutdown(SHUT_WR).  I tentatively think
that's acceptable.  libpq does not use shutdown(), and other client interfaces
would do so at their own risk.  Should we worry about hostile clients creating
a denial-of-service by causing a server send() to block unexpectedly?
Probably not; a user able to send arbitrary TCP traffic to the postmaster port
can already achieve that.

> +                     if (resEvents.lNetworkEvents & FD_CLOSE)
> +                     {
> +                             if (wakeEvents & WL_SOCKET_READABLE)
> +                                     result |= WL_SOCKET_READABLE;
> +                             if (wakeEvents & WL_SOCKET_WRITEABLE)
> +                                     result |= WL_SOCKET_WRITEABLE;
> +                     }
> +
>               }

Extra blank line.
/*
 * Test whether select() can report write-ready on a peer-closed TCP socket when
 * the send buffer is full.  Though write() won't block, some GNU/Linux systems
 * fail to report write-ready.  RHEL 6.6 (kernel-2.6.32-431.23.3.el6.x86_64,
 * glibc-2.12-1.149.el6.x86_64) has the bug.  RHEL 7.0
 * (kernel-3.10.0-123.8.1.el7.x86_64, glibc-2.17-55.el7_0.3.x86_64) does not.
 */

#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <netinet/in.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>

/* Check a syscall return value. */
void
CSYS(int res)
{
    if (res < 0)
    {
        perror("some syscall");
        exit(EXIT_FAILURE);
    }
}

/* Like socketpair(), but use a loopback TCP connection. */
static void tcppair(int fd[2])
{
    struct sockaddr_in addr;
    int srv;
    int one = 1;
    int flags;

    addr.sin_family = AF_INET;
    addr.sin_port = htons(17531);
    addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);

    CSYS(srv = socket(AF_INET, SOCK_STREAM, 0));
    CSYS(setsockopt(srv, SOL_SOCKET, SO_REUSEADDR,
                    (char *) &one, sizeof(one)));
    CSYS(bind(srv, (struct sockaddr *) &addr, sizeof(addr)));
    CSYS(listen(srv, 8));

    CSYS(fd[1] = socket(AF_INET, SOCK_STREAM, 0));
    CSYS(flags = fcntl(fd[1], F_GETFL));
    CSYS(fcntl(fd[1], F_SETFL, flags | O_NONBLOCK));
    if (connect(fd[1],  (struct sockaddr *) &addr, sizeof(addr)) >= 0 ||
        errno != EINPROGRESS)
    {
        perror("connect");
        exit(EXIT_FAILURE);
    }
    CSYS(fcntl(fd[1], F_SETFL, flags));

    CSYS(fd[0] = accept(srv, NULL, NULL));
    CSYS(close(srv));
}

/* Does select() consider the fd ready for writing? */
int
select_write(int fd)
{
   fd_set writes;
   struct timeval timeo;

   FD_ZERO(&writes);
   FD_SET(fd, &writes);
   timeo.tv_sec = 1;
   timeo.tv_usec = 0;
   CSYS(select(fd + 1, NULL, &writes, NULL, &timeo));
   return FD_ISSET(fd, &writes);
}

/* Write to a socket until writes would block. */
void
fill(int fd)
{
    int flags;
    int total;

    CSYS(flags = fcntl(fd, F_GETFL));
    CSYS(fcntl(fd, F_SETFL, flags | O_NONBLOCK));

    /*
     * On both Linux and OS X, select() can report the socket as write-ready
     * immediately after a write() reported EAGAIN.  Loop until both sources
     * agree that the socket is out of storage.
     */
    do
    {
        char buf[32 * PIPE_BUF];
        int n;

        total = 0;
        while ((n = write(fd, buf, sizeof(buf))) > 0)
            total += n;
        if (errno != EAGAIN)
            perror("write");

        printf("wrote %d bytes\n", total);
    } while (total > 0 && select_write(fd));

    CSYS(fcntl(fd, F_SETFL, flags));
}

int
main(int argc, char **argv)
{
    int fd[2];

    signal(SIGPIPE, SIG_IGN);
    tcppair(fd);

    printf("before close:%s writable\n", select_write(fd[0]) ? "" : " NOT");
    CSYS(close(fd[1]));
    printf("before fill:%s writable\n", select_write(fd[0]) ? "" : " NOT");
    fill(fd[0]);
    printf("end:%s writable\n", select_write(fd[0]) ? "" : " NOT");

    return 0;
}
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to