On Sat, Jan 10, 2015 at 03:25:42AM +0100, Andres Freund wrote:
> 0001-Allow-latches-to-wait-for-socket-writability-without.patch
> Imo pretty close to commit and can be committed independently.
The key open question is whether all platforms of interest can reliably detect
end-of-file when poll()ing or select()ing for write only. Older GNU/Linux
select() cannot; see attached test program. We use poll() there anyway, so
the bug in that configuration does not affect PostgreSQL. Is it a bellwether
of similar bugs in other implementations, bugs that will affect PostgreSQL?
> This previously had explicitly been forbidden in e42a21b9e6c9, as
> there was no use case at that point. We now are looking into making
> FE/BE communication use latches, so it
Truncated sentence.
> + if (pfds[0].revents & (POLLHUP | POLLERR | POLLNVAL))
> + {
> + /* EOF/error condition */
> + if (wakeEvents & WL_SOCKET_READABLE)
> + result |= WL_SOCKET_READABLE;
> + if (wakeEvents & WL_SOCKET_WRITEABLE)
> + result |= WL_SOCKET_WRITEABLE;
> + }
With some poll() implementations (e.g. OS X), this can wrongly report
WL_SOCKET_WRITEABLE if the peer used shutdown(SHUT_WR). I tentatively think
that's acceptable. libpq does not use shutdown(), and other client interfaces
would do so at their own risk. Should we worry about hostile clients creating
a denial-of-service by causing a server send() to block unexpectedly?
Probably not; a user able to send arbitrary TCP traffic to the postmaster port
can already achieve that.
> + if (resEvents.lNetworkEvents & FD_CLOSE)
> + {
> + if (wakeEvents & WL_SOCKET_READABLE)
> + result |= WL_SOCKET_READABLE;
> + if (wakeEvents & WL_SOCKET_WRITEABLE)
> + result |= WL_SOCKET_WRITEABLE;
> + }
> +
> }
Extra blank line.
/*
* Test whether select() can report write-ready on a peer-closed TCP socket when
* the send buffer is full. Though write() won't block, some GNU/Linux systems
* fail to report write-ready. RHEL 6.6 (kernel-2.6.32-431.23.3.el6.x86_64,
* glibc-2.12-1.149.el6.x86_64) has the bug. RHEL 7.0
* (kernel-3.10.0-123.8.1.el7.x86_64, glibc-2.17-55.el7_0.3.x86_64) does not.
*/
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <netinet/in.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
/* Check a syscall return value. */
void
CSYS(int res)
{
if (res < 0)
{
perror("some syscall");
exit(EXIT_FAILURE);
}
}
/* Like socketpair(), but use a loopback TCP connection. */
static void tcppair(int fd[2])
{
struct sockaddr_in addr;
int srv;
int one = 1;
int flags;
addr.sin_family = AF_INET;
addr.sin_port = htons(17531);
addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
CSYS(srv = socket(AF_INET, SOCK_STREAM, 0));
CSYS(setsockopt(srv, SOL_SOCKET, SO_REUSEADDR,
(char *) &one, sizeof(one)));
CSYS(bind(srv, (struct sockaddr *) &addr, sizeof(addr)));
CSYS(listen(srv, 8));
CSYS(fd[1] = socket(AF_INET, SOCK_STREAM, 0));
CSYS(flags = fcntl(fd[1], F_GETFL));
CSYS(fcntl(fd[1], F_SETFL, flags | O_NONBLOCK));
if (connect(fd[1], (struct sockaddr *) &addr, sizeof(addr)) >= 0 ||
errno != EINPROGRESS)
{
perror("connect");
exit(EXIT_FAILURE);
}
CSYS(fcntl(fd[1], F_SETFL, flags));
CSYS(fd[0] = accept(srv, NULL, NULL));
CSYS(close(srv));
}
/* Does select() consider the fd ready for writing? */
int
select_write(int fd)
{
fd_set writes;
struct timeval timeo;
FD_ZERO(&writes);
FD_SET(fd, &writes);
timeo.tv_sec = 1;
timeo.tv_usec = 0;
CSYS(select(fd + 1, NULL, &writes, NULL, &timeo));
return FD_ISSET(fd, &writes);
}
/* Write to a socket until writes would block. */
void
fill(int fd)
{
int flags;
int total;
CSYS(flags = fcntl(fd, F_GETFL));
CSYS(fcntl(fd, F_SETFL, flags | O_NONBLOCK));
/*
* On both Linux and OS X, select() can report the socket as write-ready
* immediately after a write() reported EAGAIN. Loop until both sources
* agree that the socket is out of storage.
*/
do
{
char buf[32 * PIPE_BUF];
int n;
total = 0;
while ((n = write(fd, buf, sizeof(buf))) > 0)
total += n;
if (errno != EAGAIN)
perror("write");
printf("wrote %d bytes\n", total);
} while (total > 0 && select_write(fd));
CSYS(fcntl(fd, F_SETFL, flags));
}
int
main(int argc, char **argv)
{
int fd[2];
signal(SIGPIPE, SIG_IGN);
tcppair(fd);
printf("before close:%s writable\n", select_write(fd[0]) ? "" : " NOT");
CSYS(close(fd[1]));
printf("before fill:%s writable\n", select_write(fd[0]) ? "" : " NOT");
fill(fd[0]);
printf("end:%s writable\n", select_write(fd[0]) ? "" : " NOT");
return 0;
}
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers