Added a selftest for tcpbpf (sock_ops) that checks that the appropriate
callbacks occured and that it can access tcp_sock fields and that their
values are correct.

Run with command: ./test_tcpbpf_user

Signed-off-by: Lawrence Brakmo <bra...@fb.com>
---
 tools/include/uapi/linux/bpf.h                 |  70 +++++++++++++-
 tools/testing/selftests/bpf/Makefile           |   4 +-
 tools/testing/selftests/bpf/tcp_client.py      |  52 ++++++++++
 tools/testing/selftests/bpf/tcp_server.py      |  79 ++++++++++++++++
 tools/testing/selftests/bpf/test_tcpbpf_kern.c | 125 +++++++++++++++++++++++++
 tools/testing/selftests/bpf/test_tcpbpf_user.c | 113 ++++++++++++++++++++++
 6 files changed, 438 insertions(+), 5 deletions(-)
 create mode 100755 tools/testing/selftests/bpf/tcp_client.py
 create mode 100755 tools/testing/selftests/bpf/tcp_server.py
 create mode 100644 tools/testing/selftests/bpf/test_tcpbpf_kern.c
 create mode 100644 tools/testing/selftests/bpf/test_tcpbpf_user.c

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4e8c60a..1fcd86f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -945,8 +945,9 @@ struct bpf_map_info {
 struct bpf_sock_ops {
        __u32 op;
        union {
-               __u32 reply;
-               __u32 replylong[4];
+               __u32 args[4];          /* Optionally passed to bpf program */
+               __u32 reply;            /* Returned by bpf program          */
+               __u32 replylong[4];     /* Optionally returned by bpf prog  */
        };
        __u32 family;
        __u32 remote_ip4;       /* Stored in network byte order */
@@ -955,6 +956,33 @@ struct bpf_sock_ops {
        __u32 local_ip6[4];     /* Stored in network byte order */
        __u32 remote_port;      /* Stored in network byte order */
        __u32 local_port;       /* stored in host byte order */
+       __u32 is_fullsock;      /* Some TCP fields are only valid if
+                                * there is a full socket. If not, the
+                                * fields read as zero.
+                                */
+       __u32 snd_cwnd;
+       __u32 srtt_us;          /* Averaged RTT << 3 in usecs */
+       __u32 bpf_sock_ops_flags; /* flags defined in uapi/linux/tcp.h */
+       __u32 state;
+       __u32 rtt_min;
+       __u32 snd_ssthresh;
+       __u32 rcv_nxt;
+       __u32 snd_nxt;
+       __u32 snd_una;
+       __u32 mss_cache;
+       __u32 ecn_flags;
+       __u32 rate_delivered;
+       __u32 rate_interval_us;
+       __u32 packets_out;
+       __u32 retrans_out;
+       __u32 total_retrans;
+       __u32 segs_in;
+       __u32 data_segs_in;
+       __u32 segs_out;
+       __u32 data_segs_out;
+       __u64 bytes_received;
+       __u64 bytes_acked;
+       __u32 sk_txhash;
 };
 
 /* List of known BPF sock_ops operators.
@@ -990,6 +1018,41 @@ enum {
                                         * a congestion threshold. RTTs above
                                         * this indicate congestion
                                         */
+       BPF_SOCK_OPS_RTO_CB,            /* Called when an RTO has triggered.
+                                        * Arg1: value of icsk_retransmits
+                                        * Arg2: value of icsk_rto
+                                        * Arg3: whether RTO has expired
+                                        */
+       BPF_SOCK_OPS_RETRANS_CB,        /* Called when skb is retransmitted.
+                                        * Arg1: sequence number of 1st byte
+                                        * Arg2: # segments
+                                        */
+       BPF_SOCK_OPS_STATE_CB,          /* Called when TCP changes state.
+                                        * Arg1: old_state
+                                        * Arg2: new_state
+                                        */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+       BPF_TCP_ESTABLISHED = 1,
+       BPF_TCP_SYN_SENT,
+       BPF_TCP_SYN_RECV,
+       BPF_TCP_FIN_WAIT1,
+       BPF_TCP_FIN_WAIT2,
+       BPF_TCP_TIME_WAIT,
+       BPF_TCP_CLOSE,
+       BPF_TCP_CLOSE_WAIT,
+       BPF_TCP_LAST_ACK,
+       BPF_TCP_LISTEN,
+       BPF_TCP_CLOSING,        /* Now a valid state */
+       BPF_TCP_NEW_SYN_RECV,
+
+       BPF_TCP_MAX_STATES      /* Leave at the end! */
 };
 
 #define TCP_BPF_IW             1001    /* Set TCP initial congestion window */
@@ -1009,7 +1072,8 @@ struct bpf_perf_event_value {
 #define BPF_DEVCG_DEV_CHAR     (1ULL << 1)
 
 struct bpf_cgroup_dev_ctx {
-       __u32 access_type; /* (access << 16) | type */
+       /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
+       __u32 access_type;
        __u32 major;
        __u32 minor;
 };
diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index 1304753..8e032a2 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -14,12 +14,12 @@ CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) 
$(GENFLAGS) -I../../../i
 LDLIBS += -lcap -lelf -lrt
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map 
test_progs \
-       test_align test_verifier_log test_dev_cgroup
+       test_align test_verifier_log test_dev_cgroup test_tcpbpf_user
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o 
test_obj_id.o \
        test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o 
sockmap_parse_prog.o     \
        sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
-       test_l4lb_noinline.o test_xdp_noinline.o
+       test_l4lb_noinline.o test_xdp_noinline.o test_tcpbpf_kern.o
 
 TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
        test_offload.py
diff --git a/tools/testing/selftests/bpf/tcp_client.py 
b/tools/testing/selftests/bpf/tcp_client.py
new file mode 100755
index 0000000..ac2ce32
--- /dev/null
+++ b/tools/testing/selftests/bpf/tcp_client.py
@@ -0,0 +1,52 @@
+#!/usr/local/bin/python
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+
+import sys, os, os.path, getopt
+import socket, time
+import subprocess
+import select
+
+def read(sock, n):
+    buf = ''
+    while len(buf) < n:
+        rem = n - len(buf)
+        try: s = sock.recv(rem)
+        except (socket.error), e: return ''
+        buf += s
+    return buf
+
+def send(sock, s):
+    total = len(s)
+    count = 0
+    while count < total:
+        try: n = sock.send(s)
+        except (socket.error), e: n = 0
+        if n == 0:
+            return count;
+        count += n
+    return count
+
+
+serverPort = int(sys.argv[1])
+HostName = socket.gethostname()
+
+time.sleep(1)
+
+# create active socket
+sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+try:
+    sock.connect((HostName, serverPort))
+except socket.error as e:
+    sys.exit(1)
+
+buf = ''
+n = 0
+while n < 1000:
+    buf += '+'
+    n += 1
+
+n = send(sock, buf)
+n = read(sock, 500)
+sys.exit(0)
diff --git a/tools/testing/selftests/bpf/tcp_server.py 
b/tools/testing/selftests/bpf/tcp_server.py
new file mode 100755
index 0000000..9e5db0d
--- /dev/null
+++ b/tools/testing/selftests/bpf/tcp_server.py
@@ -0,0 +1,79 @@
+#!/usr/local/bin/python
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+
+import sys, os, os.path, getopt
+import socket, time
+import subprocess
+import select
+
+def read(sock, n):
+    buf = ''
+    while len(buf) < n:
+        rem = n - len(buf)
+        try: s = sock.recv(rem)
+        except (socket.error), e: return ''
+        buf += s
+    return buf
+
+def send(sock, s):
+    total = len(s)
+    count = 0
+    while count < total:
+        try: n = sock.send(s)
+        except (socket.error), e: n = 0
+        if n == 0:
+            return count;
+        count += n
+    return count
+
+
+SERVER_PORT = 12877
+MAX_PORTS = 2
+
+serverPort = SERVER_PORT
+serverSocket = None
+
+HostName = socket.gethostname()
+
+# create passive socket
+serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+host = socket.gethostname()
+
+while serverPort < SERVER_PORT + 5:
+       try: serverSocket.bind((host, serverPort))
+       except socket.error as msg:
+            serverPort += 1
+            continue
+       break
+
+cmdStr = ("./tcp_client.py %d &") % (serverPort)
+os.system(cmdStr)
+
+buf = ''
+n = 0
+while n < 500:
+    buf += '.'
+    n += 1
+
+serverSocket.listen(MAX_PORTS)
+readList = [serverSocket]
+
+while True:
+    readyRead, readyWrite, inError = \
+        select.select(readList, [], [], 10)
+
+    if len(readyRead) > 0:
+        waitCount = 0
+        for sock in readyRead:
+            if sock == serverSocket:
+                (clientSocket, address) = serverSocket.accept()
+                address = str(address[0])
+                readList.append(clientSocket)
+            else:
+                s = read(sock, 1000)
+                n = send(sock, buf)
+                sock.close()
+                time.sleep(1)
+                sys.exit(0)
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c 
b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
new file mode 100644
index 0000000..0fa7429
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/in6.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <netinet/in.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+struct globals {
+       __u32 event_map;
+       __u32 total_retrans;
+       __u32 data_segs_in;
+       __u32 data_segs_out;
+       __u64 bytes_received;
+       __u64 bytes_acked;
+};
+
+struct bpf_map_def SEC("maps") global_map = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(struct globals),
+       .max_entries = 2,
+};
+
+static inline void update_event_map(int event)
+{
+       __u32 key = 0;
+       struct globals g, *gp;
+
+       gp = bpf_map_lookup_elem(&global_map, &key);
+       if (gp == NULL) {
+               struct globals g = {0, 0, 0, 0, 0, 0};
+
+               g.event_map |= (1 << event);
+               bpf_map_update_elem(&global_map, &key, &g,
+                           BPF_ANY);
+       } else {
+               g = *gp;
+               g.event_map |= (1 << event);
+               bpf_map_update_elem(&global_map, &key, &g,
+                           BPF_ANY);
+       }
+}
+
+int _version SEC("version") = 1;
+
+SEC("sockops")
+int bpf_testcb(struct bpf_sock_ops *skops)
+{
+       int rv = -1;
+       int op;
+       int init_seq = 0;
+       int ret = 0;
+       int v = 0;
+
+       /* For testing purposes, only execute rest of BPF program
+        * if remote port number is in the range 12877..12887
+        * I.e. the active side of the connection
+        */
+       if ((bpf_ntohl(skops->remote_port) < 12877 ||
+            bpf_ntohl(skops->remote_port) >= 12887)) {
+               skops->reply = -1;
+               return 1;
+       }
+
+       op = (int) skops->op;
+
+       /* Check that both hosts are within same datacenter. For this example
+        * it is the case when the first 5.5 bytes of their IPv6 addresses are
+        * the same.
+        */
+       if (1) {
+               update_event_map(op);
+
+               switch (op) {
+               case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+                       skops->bpf_sock_ops_flags = 0xfff;
+                       init_seq = skops->snd_nxt;
+                       break;
+               case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+                       init_seq = skops->snd_nxt;
+                       skops->bpf_sock_ops_flags = 0xfff;
+                       skops->sk_txhash = 0x12345f;
+                       v = 0xff;
+                       ret = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v,
+                                            sizeof(v));
+                       break;
+               case BPF_SOCK_OPS_RTO_CB:
+                       break;
+               case BPF_SOCK_OPS_RETRANS_CB:
+                       break;
+               case BPF_SOCK_OPS_STATE_CB:
+                       if (skops->args[1] == BPF_TCP_CLOSE) {
+                               __u32 key = 0;
+                               struct globals g, *gp;
+
+                               gp = bpf_map_lookup_elem(&global_map, &key);
+                               if (!gp)
+                                       break;
+                               g = *gp;
+                               g.total_retrans = skops->total_retrans;
+                               g.data_segs_in = skops->data_segs_in;
+                               g.data_segs_out = skops->data_segs_out;
+                               g.bytes_received = skops->bytes_received;
+                               g.bytes_acked = skops->bytes_acked;
+                               bpf_map_update_elem(&global_map, &key, &g,
+                                                   BPF_ANY);
+                       }
+                       break;
+               default:
+                       rv = -1;
+               }
+       } else {
+               rv = -1;
+       }
+       skops->reply = rv;
+       return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c 
b/tools/testing/selftests/bpf/test_tcpbpf_user.c
new file mode 100644
index 0000000..38665ea
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <assert.h>
+#include <linux/perf_event.h>
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "bpf_util.h"
+#include <linux/perf_event.h>
+
+struct globals {
+       __u32 event_map;
+       __u32 total_retrans;
+       __u32 data_segs_in;
+       __u32 data_segs_out;
+       __u64 bytes_received;
+       __u64 bytes_acked;
+};
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+                       const char *name)
+{
+       struct bpf_map *map;
+
+       map = bpf_object__find_map_by_name(obj, name);
+       if (!map) {
+               printf("%s:FAIL:map '%s' not found\n", test, name);
+               return -1;
+       }
+       return bpf_map__fd(map);
+}
+
+#define SYSTEM(CMD)                                            \
+       do {                                                    \
+               if (system(CMD)) {                              \
+                       printf("system(%s) FAILS!\n", CMD);     \
+               }                                               \
+       } while (0)
+
+int main(int argc, char **argv)
+{
+       struct globals g = {0, 0, 0, 0, 0, 0};
+       __u32 key = 0;
+       int rv;
+       int pid;
+       int error = EXIT_FAILURE;
+       int cg_fd, prog_fd, map_fd;
+       char cmd[100], *dir;
+       const char *file = "test_tcpbpf_kern.o";
+       struct bpf_object *obj;
+       struct stat buffer;
+
+       dir = "/tmp/cgroupv2/foo";
+
+       if (stat(dir, &buffer) != 0) {
+               SYSTEM("mkdir -p /tmp/cgroupv2");
+               SYSTEM("mount -t cgroup2 none /tmp/cgroupv2");
+               SYSTEM("mkdir -p /tmp/cgroupv2/foo");
+       }
+       pid = (int) getpid();
+       sprintf(cmd, "echo %d >> /tmp/cgroupv2/foo/cgroup.procs", pid);
+       SYSTEM(cmd);
+
+       cg_fd = open(dir, O_DIRECTORY, O_RDONLY);
+       if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
+//     if (load_bpf_file(prog)) {
+               printf("FAILED: load_bpf_file failed for: %s\n", file);
+//             printf("%s", bpf_log_buf);
+               goto err;
+       }
+
+       rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+       if (rv) {
+               printf("FAILED: bpf_prog_attach: %d (%s)\n",
+                      error, strerror(errno));
+               goto err;
+       }
+
+       SYSTEM("./tcp_server.py");
+
+       map_fd = bpf_find_map(__func__, obj, "global_map");
+       if (map_fd < 0)
+               goto err;
+
+       rv = bpf_map_lookup_elem(map_fd, &key, &g);
+       if (rv != 0) {
+               printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
+               goto err;
+       }
+
+       if (g.bytes_received != 501 || g.bytes_acked != 1002 ||
+           g.data_segs_in != 1 || g.data_segs_out != 1 ||
+               g.event_map != 0x45e) {
+               printf("FAILED: Wrong stats\n");
+               goto err;
+       }
+       printf("PASSED!\n");
+       error = 0;
+err:
+       bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
+       return error;
+}
-- 
2.9.5

Reply via email to