From: Craig Gallek <kr...@google.com>

This program will build classic and extended BPF programs and
validate the socket selection logic when used with
SO_ATTACH_REUSEPORT_CBPF and SO_ATTACH_REUSEPORT_EBPF.

It also validates the re-programing flow and several edge cases.

Signed-off-by: Craig Gallek <kr...@google.com>
---
 tools/testing/selftests/net/.gitignore      |   1 +
 tools/testing/selftests/net/Makefile        |   2 +-
 tools/testing/selftests/net/reuseport_bpf.c | 467 ++++++++++++++++++++++++++++
 3 files changed, 469 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/net/reuseport_bpf.c

diff --git a/tools/testing/selftests/net/.gitignore 
b/tools/testing/selftests/net/.gitignore
index 0032662..6fb2336 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,3 +1,4 @@
 socket
 psock_fanout
 psock_tpacket
+reuseport_bpf
diff --git a/tools/testing/selftests/net/Makefile 
b/tools/testing/selftests/net/Makefile
index fac4782..41449b5 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -4,7 +4,7 @@ CFLAGS = -Wall -O2 -g
 
 CFLAGS += -I../../../../usr/include/
 
-NET_PROGS = socket psock_fanout psock_tpacket
+NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf
 
 all: $(NET_PROGS)
 %: %.c
diff --git a/tools/testing/selftests/net/reuseport_bpf.c 
b/tools/testing/selftests/net/reuseport_bpf.c
new file mode 100644
index 0000000..74ff099
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -0,0 +1,467 @@
+/*
+ * Test functionality of BPF filters for SO_REUSEPORT.  The tests below will 
use
+ * a BPF program (both classic and extended) to read the first word from an
+ * incoming packet (expected to be in network byte-order), calculate a modulus
+ * of that number, and then dispatch the packet to the Nth socket using the
+ * result.  These tests are run for each supported address family and protocol.
+ * Additionally, a few edge cases in the implementation are tested.
+ */
+
+#include <errno.h>
+#include <error.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/unistd.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+struct test_params {
+       int recv_family;
+       int send_family;
+       int protocol;
+       size_t recv_socks;
+       uint16_t recv_port;
+       uint16_t send_port_min;
+};
+
+static size_t sockaddr_size(void)
+{
+       return sizeof(struct sockaddr_storage);
+}
+
+static struct sockaddr *new_any_sockaddr(int family, uint16_t port)
+{
+       struct sockaddr_storage *addr;
+       struct sockaddr_in *addr4;
+       struct sockaddr_in6 *addr6;
+
+       addr = malloc(sizeof(struct sockaddr_storage));
+       memset(addr, 0, sizeof(struct sockaddr_storage));
+
+       switch (family) {
+       case AF_INET:
+               addr4 = (struct sockaddr_in *)addr;
+               addr4->sin_family = AF_INET;
+               addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+               addr4->sin_port = htons(port);
+               break;
+       case AF_INET6:
+               addr6 = (struct sockaddr_in6 *)addr;
+               addr6->sin6_family = AF_INET6;
+               addr6->sin6_addr = in6addr_any;
+               addr6->sin6_port = htons(port);
+               break;
+       default:
+               error(1, 0, "Unsupported family %d", family);
+       }
+       return (struct sockaddr *)addr;
+}
+
+static struct sockaddr *new_loopback_sockaddr(int family, uint16_t port)
+{
+       struct sockaddr *addr = new_any_sockaddr(family, port);
+       struct sockaddr_in *addr4;
+       struct sockaddr_in6 *addr6;
+
+       switch (family) {
+       case AF_INET:
+               addr4 = (struct sockaddr_in *)addr;
+               addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+               break;
+       case AF_INET6:
+               addr6 = (struct sockaddr_in6 *)addr;
+               addr6->sin6_addr = in6addr_loopback;
+               break;
+       default:
+               error(1, 0, "Unsupported family %d", family);
+       }
+       return addr;
+}
+
+static void attach_ebpf(int fd, uint16_t mod)
+{
+       static char bpf_log_buf[65536];
+       static const char bpf_license[] = "GPL";
+
+       int bpf_fd;
+       const struct bpf_insn prog[] = {
+               /* BPF_MOV64_REG(BPF_REG_6, BPF_REG_1) */
+               { BPF_ALU64 | BPF_MOV | BPF_X, BPF_REG_6, BPF_REG_1, 0, 0 },
+               /* BPF_LD_ABS(BPF_W, 0) R0 = (uint32_t)skb[0] */
+               { BPF_LD | BPF_ABS | BPF_W, 0, 0, 0, 0 },
+               /* BPF_ALU64_IMM(BPF_MOD, BPF_REG_0, mod) */
+               { BPF_ALU64 | BPF_MOD | BPF_K, BPF_REG_0, 0, 0, mod },
+               /* BPF_EXIT_INSN() */
+               { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+       };
+       union bpf_attr attr;
+
+       memset(&attr, 0, sizeof(attr));
+       attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+       attr.insn_cnt = ARRAY_SIZE(prog);
+       attr.insns = (uint64_t)prog;
+       attr.license = (uint64_t)bpf_license;
+       attr.log_buf = (uint64_t)bpf_log_buf;
+       attr.log_size = sizeof(bpf_log_buf);
+       attr.log_level = 1;
+       attr.kern_version = 0;
+
+       bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+       if (bpf_fd < 0)
+               error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf);
+
+       if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
+                       sizeof(bpf_fd)))
+               error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
+}
+
+static void attach_cbpf(int fd, uint16_t mod)
+{
+       struct sock_filter code[] = {
+               /* A = (uint32_t)skb[0] */
+               { BPF_LD  | BPF_W | BPF_ABS, 0, 0, 0 },
+               /* A = A % mod */
+               { BPF_ALU | BPF_MOD, 0, 0, mod },
+               /* return A */
+               { BPF_RET | BPF_A, 0, 0, 0 },
+       };
+       struct sock_fprog p = {
+               .len = ARRAY_SIZE(code),
+               .filter = code,
+       };
+
+       if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
+               error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
+}
+
+static void build_recv_group(const struct test_params p, int fd[], uint16_t 
mod,
+                            void (*attach_bpf)(int, uint16_t))
+{
+       struct sockaddr * const addr =
+               new_any_sockaddr(p.recv_family, p.recv_port);
+       int i, opt;
+
+       for (i = 0; i < p.recv_socks; ++i) {
+               fd[i] = socket(p.recv_family, p.protocol, 0);
+               if (fd[i] < 0)
+                       error(1, errno, "failed to create recv %d", i);
+
+               opt = 1;
+               if (setsockopt(fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+                              sizeof(opt)))
+                       error(1, errno, "failed to set SO_REUSEPORT on %d", i);
+
+               if (i == 0)
+                       attach_bpf(fd[i], mod);
+
+               if (bind(fd[i], addr, sockaddr_size()))
+                       error(1, errno, "failed to bind recv socket %d", i);
+
+               if (p.protocol == SOCK_STREAM)
+                       if (listen(fd[i], p.recv_socks * 10))
+                               error(1, errno, "failed to listen on socket");
+       }
+       free(addr);
+}
+
+static void send_from(struct test_params p, uint16_t sport, char *buf,
+                     size_t len)
+{
+       struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport);
+       struct sockaddr * const daddr =
+               new_loopback_sockaddr(p.send_family, p.recv_port);
+       const int fd = socket(p.send_family, p.protocol, 0);
+
+       if (fd < 0)
+               error(1, errno, "failed to create send socket");
+
+       if (bind(fd, saddr, sockaddr_size()))
+               error(1, errno, "failed to bind send socket");
+       if (connect(fd, daddr, sockaddr_size()))
+               error(1, errno, "failed to connect");
+
+       if (send(fd, buf, len, 0) < 0)
+               error(1, errno, "failed to send message");
+
+       close(fd);
+       free(saddr);
+       free(daddr);
+}
+
+static void test_recv_order(const struct test_params p, int fd[], int mod)
+{
+       char recv_buf[8], send_buf[8];
+       struct msghdr msg;
+       struct iovec recv_io = { recv_buf, 8 };
+       struct epoll_event ev;
+       int epfd, conn, i, sport, expected;
+       uint32_t data, ndata;
+
+       epfd = epoll_create(1);
+       if (epfd < 0)
+               error(1, errno, "failed to create epoll");
+       for (i = 0; i < p.recv_socks; ++i) {
+               ev.events = EPOLLIN;
+               ev.data.fd = fd[i];
+               if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd[i], &ev))
+                       error(1, errno, "failed to register sock %d epoll", i);
+       }
+
+       memset(&msg, 0, sizeof(msg));
+       msg.msg_iov = &recv_io;
+       msg.msg_iovlen = 1;
+
+       for (data = 0; data < p.recv_socks * 2; ++data) {
+               sport = p.send_port_min + data;
+               ndata = htonl(data);
+               memcpy(send_buf, &ndata, sizeof(ndata));
+               send_from(p, sport, send_buf, sizeof(ndata));
+
+               i = epoll_wait(epfd, &ev, 1, -1);
+               if (i < 0)
+                       error(1, errno, "epoll wait failed");
+
+               if (p.protocol == SOCK_STREAM) {
+                       conn = accept(ev.data.fd, NULL, NULL);
+                       if (conn < 0)
+                               error(1, errno, "error accepting");
+                       i = recvmsg(conn, &msg, 0);
+                       close(conn);
+               } else {
+                       i = recvmsg(ev.data.fd, &msg, 0);
+               }
+               if (i < 0)
+                       error(1, errno, "recvmsg error");
+               if (i != sizeof(ndata))
+                       error(1, 0, "expected size %zd got %d",
+                             sizeof(ndata), i);
+
+               for (i = 0; i < p.recv_socks; ++i)
+                       if (ev.data.fd == fd[i])
+                               break;
+               memcpy(&ndata, recv_buf, sizeof(ndata));
+               fprintf(stderr, "Socket %d: %d\n", i, ntohl(ndata));
+
+               expected = (sport % mod);
+               if (i != expected)
+                       error(1, 0, "expected socket %d", expected);
+       }
+}
+
+static void test_reuseport_ebpf(const struct test_params p)
+{
+       int i, fd[p.recv_socks];
+
+       fprintf(stderr, "Testing EBPF mod %zd...\n", p.recv_socks);
+       build_recv_group(p, fd, p.recv_socks, attach_ebpf);
+       test_recv_order(p, fd, p.recv_socks);
+
+       fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
+       attach_ebpf(fd[0], p.recv_socks / 2);
+       test_recv_order(p, fd, p.recv_socks / 2);
+
+       for (i = 0; i < p.recv_socks; ++i)
+               close(fd[i]);
+}
+
+static void test_reuseport_cbpf(const struct test_params p)
+{
+       int i, fd[p.recv_socks];
+
+       fprintf(stderr, "Testing CBPF mod %zd...\n", p.recv_socks);
+       build_recv_group(p, fd, p.recv_socks, attach_cbpf);
+       test_recv_order(p, fd, p.recv_socks);
+
+       fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
+       attach_cbpf(fd[0], p.recv_socks / 2);
+       test_recv_order(p, fd, p.recv_socks / 2);
+
+       for (i = 0; i < p.recv_socks; ++i)
+               close(fd[i]);
+}
+
+static void test_extra_filter(const struct test_params p)
+{
+       struct sockaddr * const addr =
+               new_any_sockaddr(p.recv_family, p.recv_port);
+       int fd1, fd2, opt;
+
+       fprintf(stderr, "Testing too many filters...\n");
+       fd1 = socket(p.recv_family, p.protocol, 0);
+       if (fd1 < 0)
+               error(1, errno, "failed to create socket 1");
+       fd2 = socket(p.recv_family, p.protocol, 0);
+       if (fd2 < 0)
+               error(1, errno, "failed to create socket 2");
+
+       opt = 1;
+       if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+               error(1, errno, "failed to set SO_REUSEPORT on socket 1");
+       if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+               error(1, errno, "failed to set SO_REUSEPORT on socket 2");
+
+       attach_ebpf(fd1, 10);
+       attach_ebpf(fd2, 10);
+
+       if (bind(fd1, addr, sockaddr_size()))
+               error(1, errno, "failed to bind recv socket 1");
+
+       if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE)
+               error(1, errno, "bind socket 2 should fail with EADDRINUSE");
+
+       free(addr);
+}
+
+static void test_filter_no_reuseport(const struct test_params p)
+{
+       struct sockaddr * const addr =
+               new_any_sockaddr(p.recv_family, p.recv_port);
+       const char bpf_license[] = "GPL";
+       struct bpf_insn ecode[] = {
+               { BPF_ALU64 | BPF_MOV | BPF_K, BPF_REG_0, 0, 0, 10 },
+               { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+       };
+       struct sock_filter ccode[] = {{ BPF_RET | BPF_A, 0, 0, 0 }};
+       union bpf_attr eprog;
+       struct sock_fprog cprog;
+       int fd, bpf_fd;
+
+       fprintf(stderr, "Testing filters on non-SO_REUSEPORT socket...\n");
+
+       memset(&eprog, 0, sizeof(eprog));
+       eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+       eprog.insn_cnt = ARRAY_SIZE(ecode);
+       eprog.insns = (uint64_t)ecode;
+       eprog.license = (uint64_t)bpf_license;
+       eprog.kern_version = 0;
+
+       memset(&cprog, 0, sizeof(cprog));
+       cprog.len = ARRAY_SIZE(ccode);
+       cprog.filter = ccode;
+
+
+       bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &eprog, sizeof(eprog));
+       if (bpf_fd < 0)
+               error(1, errno, "ebpf error");
+       fd = socket(p.recv_family, p.protocol, 0);
+       if (fd < 0)
+               error(1, errno, "failed to create socket 1");
+
+       if (bind(fd, addr, sockaddr_size()))
+               error(1, errno, "failed to bind recv socket 1");
+
+       errno = 0;
+       if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
+                       sizeof(bpf_fd)) || errno != EINVAL)
+               error(1, errno, "setsockopt should have returned EINVAL");
+
+       errno = 0;
+       if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &cprog,
+                      sizeof(cprog)) || errno != EINVAL)
+               error(1, errno, "setsockopt should have returned EINVAL");
+
+       free(addr);
+}
+
+static void test_filter_without_bind(void)
+{
+       int fd1, fd2;
+
+       fprintf(stderr, "Testing filter add without bind...\n");
+       fd1 = socket(AF_INET, SOCK_DGRAM, 0);
+       if (fd1 < 0)
+               error(1, errno, "failed to create socket 1");
+       fd2 = socket(AF_INET, SOCK_DGRAM, 0);
+       if (fd2 < 0)
+               error(1, errno, "failed to create socket 2");
+
+       attach_ebpf(fd1, 10);
+       attach_cbpf(fd2, 10);
+
+       close(fd1);
+       close(fd2);
+}
+
+
+int main(void)
+{
+       fprintf(stderr, "---- IPv4 UDP ----\n");
+       test_reuseport_ebpf((struct test_params) {
+               .recv_family = AF_INET,
+               .send_family = AF_INET,
+               .protocol = SOCK_DGRAM,
+               .recv_socks = 10,
+               .recv_port = 8000,
+               .send_port_min = 9000});
+       test_reuseport_cbpf((struct test_params) {
+               .recv_family = AF_INET,
+               .send_family = AF_INET,
+               .protocol = SOCK_DGRAM,
+               .recv_socks = 10,
+               .recv_port = 8001,
+               .send_port_min = 9020});
+       test_extra_filter((struct test_params) {
+               .recv_family = AF_INET,
+               .protocol = SOCK_DGRAM,
+               .recv_port = 8002});
+       test_filter_no_reuseport((struct test_params) {
+               .recv_family = AF_INET,
+               .protocol = SOCK_DGRAM,
+               .recv_port = 8008});
+
+       fprintf(stderr, "---- IPv6 UDP ----\n");
+       test_reuseport_ebpf((struct test_params) {
+               .recv_family = AF_INET6,
+               .send_family = AF_INET6,
+               .protocol = SOCK_DGRAM,
+               .recv_socks = 10,
+               .recv_port = 8003,
+               .send_port_min = 9040});
+       test_reuseport_cbpf((struct test_params) {
+               .recv_family = AF_INET6,
+               .send_family = AF_INET6,
+               .protocol = SOCK_DGRAM,
+               .recv_socks = 10,
+               .recv_port = 8004,
+               .send_port_min = 9060});
+       test_extra_filter((struct test_params) {
+               .recv_family = AF_INET6,
+               .protocol = SOCK_DGRAM,
+               .recv_port = 8005});
+       test_filter_no_reuseport((struct test_params) {
+               .recv_family = AF_INET6,
+               .protocol = SOCK_DGRAM,
+               .recv_port = 8009});
+
+       fprintf(stderr, "---- IPv6 UDP w/ mapped IPv4 ----\n");
+       test_reuseport_ebpf((struct test_params) {
+               .recv_family = AF_INET6,
+               .send_family = AF_INET,
+               .protocol = SOCK_DGRAM,
+               .recv_socks = 10,
+               .recv_port = 8006,
+               .send_port_min = 9080});
+       test_reuseport_cbpf((struct test_params) {
+               .recv_family = AF_INET6,
+               .send_family = AF_INET,
+               .protocol = SOCK_DGRAM,
+               .recv_socks = 10,
+               .recv_port = 8007,
+               .send_port_min = 9100});
+
+
+       test_filter_without_bind();
+
+       fprintf(stderr, "SUCCESS\n");
+       return 0;
+}
-- 
2.6.0.rc2.230.g3dd15c0

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to