There was some discussion a few weeks ago about whether TCP_CORK was
broken on Linux 2.6.

I did some tests on Linux 2.6.12 and found that TCP_CORK *does* appear to work. However, it doesn't work if TCP_NODELAY is also set on the socket (as
the tcp(7) manual page warns).

I've attached my test program in case it's useful to others. It's run as sendfile_test [--cork] [--nodelay] [--nonblock] filename listener_port_number. It listens endlessly for connections on the specified port. When it gets a connection, it write(2)s a short HTTP response header and then calls sendfile to transmit the entire contents of the file. The optional arguments tell it whether to set TCP_CORK,
TCP_NODELAY, and/or O_NONBLOCK on the accepted connection; the default
is not to set any of these.

Looking at packet traces, the pattern I see is:

* TCP_NODELAY not set, TCP_CORK not set: the HTTP response header
arrives in a packet all by itself, followed by the start of the file in the next packet.

* TCP_NODELAY not set, TCP_CORK set: the first packet of the response contains the HTTP response header and the first part of the file. The packet is filled with data up to the MSS, unless the total header+file content is smaller than the MSS.

* TCP_NODELAY set, TCP_CORK set: the HTTP response header
arrives in a packet all by itself, followed by the start of the file in the next packet.

* TCP_NODELAY set, TCP_CORK not set: the HTTP response header
arrives in a packet all by itself, followed by the start of the file in the next packet.

I tested with both blocking or nonblocking sockets, and both modes yielded
these same results.

httpd-2.x always sets TCP_NODELAY, so it makes sense that people are
observing it sending the response header in a separate packet under Linux
2.6.  (I'm guessing that TCP_CORK works even in the presence of
TCP_NODELAY under 2.4, but I don't have a 2.4 system to test.)

Rather than automatically setting TCP_NODELAY  in core_pre_connection(),
perhaps we should set it conditionally in core_output_filter(), where we have
enough information to tell whether it's needed.

Brian



#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/errno.h>
#include <sys/fcntl.h>
#include <sys/poll.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <netinet/tcp.h>

static void usage();
static int write_all(int connection, const char *buf, size_t size);
static int sendfile_all(int connection, int fd, off_t file_size);

#define LISTENER_PORT 8080
static const char response_header[] = "HTTP/1.0 200 OK\r\n\r\n";
int
main(int argc, char **argv)
{
  int port_num;
  struct sockaddr_in addr;
  int listener;
  int fd;
  struct stat file_info;
  int enable_tcp_nodelay = 0;
  int enable_tcp_cork = 0;
  int enable_nonblocking = 0;
  int i;

  if (argc < 3) {
    usage();
  }
  for (i = 1; i < argc - 2; i++) {
    if (!strcmp(argv[i], "--nodelay")) {
      enable_tcp_nodelay = 1;
    }
    else if (!strcmp(argv[i], "--cork")) {
      enable_tcp_cork = 1;
    }
    else if (!strcmp(argv[i], "--nonblock")) {
      enable_nonblocking = 1;
    }
    else {
      usage();
    }
  }

  fd = open(argv[i], O_RDONLY);
  if (fd == -1) {
    perror("open");
    exit(1);
  }
  if (fstat(fd, &file_info) == -1) {
    perror("fstat");
    exit(1);
  }

  port_num = atoi(argv[i + 1]);
  listener = socket(AF_INET, SOCK_STREAM, PF_UNSPEC);
  if (listener == -1) {
    perror("socket");
    exit(1);
  }
  addr.sin_family = AF_INET;
  addr.sin_port = htons(port_num);
  addr.sin_addr.s_addr = htonl(INADDR_ANY);
  if (bind(listener, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
    perror("bind");
    exit(1);
  }
  if (listen(listener, 1024) == -1) {
    perror("listen");
    exit(1);
  }

  for (;;) {
    off_t offset = 0;
    off_t bytes_remaining = file_info.st_size;
    struct sockaddr_in new_addr;
    socklen_t addr_length = sizeof(new_addr);
    int connection = accept(listener, (struct sockaddr *)&new_addr,
                            &addr_length);
    if (connection == -1) {
      perror("accept");
      break;
    }
    if (enable_nonblocking) {
      int flags = fcntl(connection, F_GETFL, 0);
      if (flags == -1) {
        perror("fcntl(F_GETFL)");
        break;
      }
      flags |= O_NONBLOCK;
      if (fcntl(connection, F_SETFL, flags) == -1) {
        perror("fcntl(F_SETFL)");
        break;
      }
    }
    if (enable_tcp_nodelay) {
      int flag = 1;
      if (setsockopt(connection, IPPROTO_TCP, TCP_NODELAY, &flag,
                     sizeof(flag)) == -1) {
        perror("setsockopt(TCP_NODELAY)");
      }
    }
    if (enable_tcp_cork) {
      int flag = 1;
      if (setsockopt(connection, IPPROTO_TCP, TCP_CORK, &flag,
                     sizeof(flag)) == -1) {
        perror("setsockopt(TCP_CORK)");
      }
    }
    if (write_all(connection, response_header, sizeof(response_header) - 1)
        == -1) {
      perror("write");
    }
    else if (sendfile_all(connection, fd, file_info.st_size) == -1) {
      perror("sendfile");
    }
    close(connection);
  }

  close(listener);
  exit(0);
  return 0;
}

static void
usage()
{
  fprintf(stderr, "usage: sendfile_test [--nodelay] [--cork] [--nonblock] 
filename portnum\n");
  exit(2);
}

static int
write_all(int connection, const char *data, size_t size)
{
  size_t offset = 0;
  while (offset < size) {
    int rv = write(connection, data + offset,  size - offset);
    if (rv == -1) {
      if (errno == EAGAIN) {
        struct pollfd poll_fd;
        poll_fd.fd = connection;
        poll_fd.events = POLLOUT | POLLERR | POLLHUP | POLLNVAL;
        if (poll(&poll_fd, 1, -1) == -1) {
          perror("poll");
          break;
        }
        continue;
      }
      return rv;
    }
    offset += rv;
  }
  return (int)offset;
}

static int
sendfile_all(int connection, int fd, off_t file_size)
{
  off_t offset = 0;
  while (offset < file_size) {
    int rv = sendfile(connection, fd, &offset, file_size - offset);
    if (rv == -1) {
      if (errno == EAGAIN) {
        struct pollfd poll_fd;
        poll_fd.fd = connection;
        poll_fd.events = POLLOUT | POLLERR | POLLHUP | POLLNVAL;
        if (poll(&poll_fd, 1, -1) == -1) {
          perror("poll");
          break;
        }
        continue;
      }
      return rv;
    }
    offset += rv;
  }
  return (int)offset;
}


Reply via email to