Add a sample program that shows how socksg program is used and attached to socket filter. The kernel sample program deals with struct scatterlist that is passed as bpf context.
When run in server mode, the sample RDS program opens PF_RDS socket, attaches eBPF program to RDS socket which then uses bpf_sg_next helper along with bpf tail calls to retrieve packet data contained in struct scatterlist form. To ease testing, RDS client functionality is also added so that users can generate RDS packet. Server: [root@lab71 bpf]# ./rds_filter -s 192.168.3.71 -t tcp running server in a loop transport tcp server bound to address: 192.168.3.71 port 4000 server listening on 192.168.3.71 Client: [root@lab70 bpf]# ./rds_filter -s 192.168.3.71 -c 192.168.3.70 -t tcp transport tcp client bound to address: 192.168.3.70 port 25278 client sending 8192 byte message from 192.168.3.70 to 192.168.3.71 on port 25278 payload contains:30 31 32 33 34 35 36 37 38 39 ... Server output: 192.168.3.71 received a packet from 192.168.3.71 of len 8192 cmsg len 0, on port 25278 payload contains:30 31 32 33 34 35 36 37 38 39 ... server listening on 192.168.3.71 BPF program output: [root@lab71]# cat /sys/kernel/debug/tracing/trace_pipe <idle>-0 [007] ..s. 525.994894: 0: Print first 6 bytes from sg element <idle>-0 [007] ..s. 525.994897: 0: First sg element: <idle>-0 [007] ..s. 525.994899: 0: 30 31 32 <idle>-0 [007] ..s. 525.994900: 0: 33 34 35 <idle>-0 [007] ..s. 525.994901: 0: next sg element: <idle>-0 [007] ..s. 525.994902: 0: a8 a9 aa <idle>-0 [007] ..s. 525.994903: 0: ab ac ad <idle>-0 [007] ..s. 525.994904: 0: next sg element: <idle>-0 [007] ..s. 525.994905: 0: 50 51 52 <idle>-0 [007] ..s. 525.994905: 0: 53 54 55 <idle>-0 [007] ..s. 525.994906: 0: next sg element: <idle>-0 [007] ..s. 525.994907: 0: f8 f9 fa <idle>-0 [007] ..s. 525.994907: 0: fb fc fd <idle>-0 [007] ..s. 525.994908: 0: next sg element: <idle>-0 [007] ..s. 525.994909: 0: a0 a1 a2 <idle>-0 [007] ..s. 525.994909: 0: a3 a4 a5 <idle>-0 [007] ..s. 525.994910: 0: next sg element: <idle>-0 [007] ..s. 525.994911: 0: 48 49 4a <idle>-0 [007] ..s. 525.994911: 0: 4b 4c 4d <idle>-0 [007] ..s. 525.994912: 0: no more sg element Similary specifying '-t ib' will run this on IB link. Signed-off-by: Tushar Dave <tushar.n.d...@oracle.com> Acked-by: Sowmini Varadhan <sowmini.varad...@oracle.com> --- samples/bpf/Makefile | 3 + samples/bpf/rds_filter_kern.c | 78 ++++++++++ samples/bpf/rds_filter_user.c | 339 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 420 insertions(+) create mode 100644 samples/bpf/rds_filter_kern.c create mode 100644 samples/bpf/rds_filter_user.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 1303af1..5de238b 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -52,6 +52,7 @@ hostprogs-y += xdp_adjust_tail hostprogs-y += xdpsock hostprogs-y += xdp_fwd hostprogs-y += task_fd_query +hostprogs-y += rds_filter # Libbpf dependencies LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a @@ -107,6 +108,7 @@ xdp_adjust_tail-objs := xdp_adjust_tail_user.o xdpsock-objs := bpf_load.o xdpsock_user.o xdp_fwd-objs := bpf_load.o xdp_fwd_user.o task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) +rds_filter-objs := bpf_load.o rds_filter_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -163,6 +165,7 @@ always += xdp_adjust_tail_kern.o always += xdpsock_kern.o always += xdp_fwd_kern.o always += task_fd_query_kern.o +always += rds_filter_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ diff --git a/samples/bpf/rds_filter_kern.c b/samples/bpf/rds_filter_kern.c new file mode 100644 index 0000000..8fe3d3c --- /dev/null +++ b/samples/bpf/rds_filter_kern.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/filter.h> +#include <linux/ptrace.h> +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include <linux/rds.h> +#include "bpf_helpers.h" + +#define PROG(F) SEC("socksg/"__stringify(F)) int bpf_func_##F + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +struct bpf_map_def SEC("maps") jmp_table = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = 2, +}; + +#define SG1 1 + +static inline void dump_sg(struct sg_filter_md *sg) +{ + void *data = (void *)(long) sg->data; + void *data_end = (void *)(long) sg->data_end; + unsigned char *d; + + if (data + 8 > data_end) + return; + + d = (unsigned char *)data; + bpf_printk("%x %x %x\n", d[0], d[1], d[2]); + bpf_printk("%x %x %x\n", d[3], d[4], d[5]); + + return; + +} + +static void sg_dispatcher(struct sg_filter_md *sg) +{ + int ret; + + ret = bpf_sg_next(sg); + if (ret == -ENODATA) { + bpf_printk("no more sg element\n"); + return; + } + + /* We use same function to walk sg list */ + bpf_tail_call(sg, &jmp_table, 1); +} + +/* walk sg list */ +PROG(SG1)(struct sg_filter_md *sg) +{ + bpf_printk("next sg element:\n"); + dump_sg(sg); + sg_dispatcher(sg); + return 0; +} + +SEC("socksg/0") +int main_prog(struct sg_filter_md *sg) +{ + bpf_printk("Print first 6 bytes from sg element\n"); + bpf_printk("First sg element:\n"); + dump_sg(sg); + sg_dispatcher(sg); + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/rds_filter_user.c b/samples/bpf/rds_filter_user.c new file mode 100644 index 0000000..1165f1e --- /dev/null +++ b/samples/bpf/rds_filter_user.c @@ -0,0 +1,339 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <arpa/inet.h> +#include <assert.h> +#include "bpf_load.h" +#include <getopt.h> +#include <errno.h> +#include <netinet/in.h> +#include <limits.h> +#include <linux/sockios.h> +#include <linux/rds.h> +#include <linux/errqueue.h> +#include <linux/bpf.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> + +#define TESTPORT 4000 +#define BUFSIZE 8192 + +int transport = -1; + +static int str2trans(const char *trans) +{ + if (strcmp(trans, "tcp") == 0) + return RDS_TRANS_TCP; + if (strcmp(trans, "ib") == 0) + return RDS_TRANS_IB; + return (RDS_TRANS_NONE); +} + +static const char *trans2str(int trans) +{ + switch (trans) { + case RDS_TRANS_TCP: + return ("tcp"); + case RDS_TRANS_IB: + return ("ib"); + case RDS_TRANS_NONE: + return ("none"); + default: + return ("unknown"); + } +} + +static int gettransport(int sock) +{ + int err; + char val; + socklen_t len = sizeof(int); + + err = getsockopt(sock, SOL_RDS, SO_RDS_TRANSPORT, + (char *)&val, &len); + if (err < 0) { + fprintf(stderr, "%s: getsockopt %s\n", + __func__, strerror(errno)); + return err; + } + return (int)val; +} + +static int settransport(int sock, int transport) +{ + int err; + + err = setsockopt(sock, SOL_RDS, SO_RDS_TRANSPORT, + (char *)&transport, sizeof(transport)); + if (err < 0) { + fprintf(stderr, "could not set transport %s, %s\n", + trans2str(transport), strerror(errno)); + } + return err; +} + +static void print_sock_local_info(int fd, char *str, struct sockaddr_in *ret) +{ + socklen_t sin_size = sizeof(struct sockaddr_in); + struct sockaddr_in sin; + int err; + + err = getsockname(fd, (struct sockaddr *)&sin, &sin_size); + if (err < 0) { + fprintf(stderr, "%s getsockname %s\n", + __func__, strerror(errno)); + return; + } + printf("%s address: %s port %d\n", + (str ? str : ""), inet_ntoa(sin.sin_addr), ntohs(sin.sin_port)); + + if (ret != NULL) + *ret = sin; +} + +static void print_payload(char *buf) +{ + int i; + + printf("payload contains:"); + for (i = 0; i < 10; i++) + printf("%x ", buf[i]); + printf("...\n"); +} + +static void server(char *address, in_port_t port) +{ + struct sockaddr_in sin, din; + struct msghdr msg; + struct iovec *iov; + int rc, sock; + char *buf; + + buf = calloc(BUFSIZE, sizeof(char)); + if (!buf) { + fprintf(stderr, "%s: calloc %s\n", __func__, strerror(errno)); + return; + } + + sock = socket(PF_RDS, SOCK_SEQPACKET, 0); + if (sock < 0) { + fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno)); + goto out; + } + if (settransport(sock, transport) < 0) + goto out; + + printf("transport %s\n", trans2str(gettransport(sock))); + + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = inet_addr(address); + sin.sin_port = htons(port); + + rc = bind(sock, (struct sockaddr *)&sin, sizeof(sin)); + if (rc < 0) { + fprintf(stderr, "%s: bind %s\n", __func__, strerror(errno)); + goto out; + } + + /* attach bpf prog */ + assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[1], + sizeof(prog_fd[0])) == 0); + + print_sock_local_info(sock, "server bound to", NULL); + + iov = calloc(1, sizeof(struct iovec)); + if (!iov) { + fprintf(stderr, "%s: calloc %s\n", __func__, strerror(errno)); + goto out; + } + + while (1) { + memset(buf, 0, BUFSIZE); + iov[0].iov_base = buf; + iov[0].iov_len = BUFSIZE; + + memset(&msg, 0, sizeof(msg)); + msg.msg_name = &din; + msg.msg_namelen = sizeof(din); + msg.msg_iov = iov; + msg.msg_iovlen = 1; + + printf("server listening on %s\n", inet_ntoa(sin.sin_addr)); + + rc = recvmsg(sock, &msg, 0); + if (rc < 0) { + fprintf(stderr, "%s: recvmsg %s\n", + __func__, strerror(errno)); + break; + } + + printf("%s received a packet from %s of len %d cmsg len %d, on port %d\n", + inet_ntoa(sin.sin_addr), + inet_ntoa(din.sin_addr), + (uint32_t) iov[0].iov_len, + (uint32_t) msg.msg_controllen, + ntohs(din.sin_port)); + + print_payload(buf); + } + free(iov); +out: + free(buf); +} + +static void create_message(char *buf) +{ + unsigned int i; + + for (i = 0; i < BUFSIZE; i++) { + buf[i] = i + 0x30; + } +} + +static int build_rds_packet(struct msghdr *msg, char *buf) +{ + struct iovec *iov; + + iov = calloc(1, sizeof(struct iovec)); + if (!iov) { + fprintf(stderr, "%s: calloc %s\n", __func__, strerror(errno)); + return -1; + } + + msg->msg_iov = iov; + msg->msg_iovlen = 1; + + iov[0].iov_base = buf; + iov[0].iov_len = BUFSIZE * sizeof(char); + + return 0; +} + +static void client(char *localaddr, char *remoteaddr, in_port_t server_port) +{ + struct sockaddr_in sin, din; + struct msghdr msg; + int rc, sock; + char *buf; + + buf = calloc(BUFSIZE, sizeof(char)); + if (!buf) { + fprintf(stderr, "%s: calloc %s\n", __func__, strerror(errno)); + return; + } + + create_message(buf); + + sock = socket(PF_RDS, SOCK_SEQPACKET, 0); + if (sock < 0) { + fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno)); + goto out; + } + + if (settransport(sock, transport) < 0) + goto out; + + printf("transport %s\n", trans2str(gettransport(sock))); + + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = inet_addr(localaddr); + sin.sin_port = 0; + + rc = bind(sock, (struct sockaddr *)&sin, sizeof(sin)); + if (rc < 0) { + fprintf(stderr, "%s: bind %s\n", __func__, strerror(errno)); + goto out; + } + print_sock_local_info(sock, "client bound to", &sin); + + memset(&msg, 0, sizeof(msg)); + msg.msg_name = &din; + msg.msg_namelen = sizeof(din); + + memset(&din, 0, sizeof(din)); + din.sin_family = AF_INET; + din.sin_addr.s_addr = inet_addr(remoteaddr); + din.sin_port = htons(server_port); + + rc = build_rds_packet(&msg, buf); + if (rc < 0) + goto out; + + printf("client sending %d byte message from %s to %s on port %d\n", + (uint32_t) msg.msg_iov->iov_len, localaddr, + remoteaddr, ntohs(sin.sin_port)); + + rc = sendmsg(sock, &msg, 0); + if (rc < 0) + fprintf(stderr, "%s: sendmsg %s\n", __func__, strerror(errno)); + + print_payload(buf); + + if (msg.msg_control) + free(msg.msg_control); + if (msg.msg_iov) + free(msg.msg_iov); +out: + free(buf); + + return; +} + +static void usage(char *progname) +{ + fprintf(stderr, "Usage %s [-s srvaddr] [-c clientaddr] [-t transport]" + "\n", progname); +} + +int main(int argc, char **argv) +{ + in_port_t server_port = TESTPORT; + char *serveraddr = NULL; + char *clientaddr = NULL; + char filename[256]; + int opt; + + while ((opt = getopt(argc, argv, "s:c:t:")) != -1) { + switch (opt) { + case 's': + serveraddr = optarg; + break; + case 'c': + clientaddr = optarg; + break; + case 't': + transport = str2trans(optarg); + if (transport == RDS_TRANS_NONE) { + fprintf(stderr, + "unknown transport %s\n", optarg); + usage(argv[0]); + return (-1); + } + break; + default: + usage(argv[0]); + return 1; + } + } + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + fprintf(stderr, "Error: load_bpf_file %s", bpf_log_buf); + return 1; + } + + if (serveraddr && !clientaddr) { + printf("running server in a loop\n"); + server(serveraddr, server_port); + } else if (serveraddr && clientaddr) { + client(clientaddr, serveraddr, server_port); + } + + return 0; +} -- 1.8.3.1