Add a sample program that shows how socksg program is used and attached
to socket filter. The kernel sample program deals with struct
scatterlist that is passed as bpf context.

When run in server mode, the sample RDS program opens PF_RDS socket,
attaches eBPF program to RDS socket which then uses bpf_sg_next
helper along with bpf tail calls to retrieve packet data contained in
struct scatterlist form.

To ease testing, RDS client functionality is also added so that users
can generate RDS packet.

Server:
[root@lab71 bpf]# ./rds_filter -s 192.168.3.71 -t tcp
running server in a loop
transport tcp
server bound to address: 192.168.3.71 port 4000
server listening on 192.168.3.71

Client:
[root@lab70 bpf]# ./rds_filter -s 192.168.3.71 -c 192.168.3.70 -t tcp
transport tcp
client bound to address: 192.168.3.70 port 25278
client sending 8192 byte message  from 192.168.3.70 to 192.168.3.71 on
port 25278
payload contains:30 31 32 33 34 35 36 37 38 39 ...

Server output:
192.168.3.71 received a packet from 192.168.3.71 of len 8192 cmsg len 0,
on port 25278
payload contains:30 31 32 33 34 35 36 37 38 39 ...
server listening on 192.168.3.71

BPF program output:
[root@lab71]# cat /sys/kernel/debug/tracing/trace_pipe
          <idle>-0     [007] ..s.   525.994894: 0: Print first 6 bytes from sg 
element
          <idle>-0     [007] ..s.   525.994897: 0: First sg element:
          <idle>-0     [007] ..s.   525.994899: 0: 30 31 32
          <idle>-0     [007] ..s.   525.994900: 0: 33 34 35
          <idle>-0     [007] ..s.   525.994901: 0: next sg element:
          <idle>-0     [007] ..s.   525.994902: 0: a8 a9 aa
          <idle>-0     [007] ..s.   525.994903: 0: ab ac ad
          <idle>-0     [007] ..s.   525.994904: 0: next sg element:
          <idle>-0     [007] ..s.   525.994905: 0: 50 51 52
          <idle>-0     [007] ..s.   525.994905: 0: 53 54 55
          <idle>-0     [007] ..s.   525.994906: 0: next sg element:
          <idle>-0     [007] ..s.   525.994907: 0: f8 f9 fa
          <idle>-0     [007] ..s.   525.994907: 0: fb fc fd
          <idle>-0     [007] ..s.   525.994908: 0: next sg element:
          <idle>-0     [007] ..s.   525.994909: 0: a0 a1 a2
          <idle>-0     [007] ..s.   525.994909: 0: a3 a4 a5
          <idle>-0     [007] ..s.   525.994910: 0: next sg element:
          <idle>-0     [007] ..s.   525.994911: 0: 48 49 4a
          <idle>-0     [007] ..s.   525.994911: 0: 4b 4c 4d
          <idle>-0     [007] ..s.   525.994912: 0: no more sg element

Similary specifying '-t ib' will run this on IB link.

Signed-off-by: Tushar Dave <tushar.n.d...@oracle.com>
Acked-by: Sowmini Varadhan <sowmini.varad...@oracle.com>
---
 samples/bpf/Makefile          |   3 +
 samples/bpf/rds_filter_kern.c |  78 ++++++++++
 samples/bpf/rds_filter_user.c | 339 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 420 insertions(+)
 create mode 100644 samples/bpf/rds_filter_kern.c
 create mode 100644 samples/bpf/rds_filter_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 1303af1..5de238b 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -52,6 +52,7 @@ hostprogs-y += xdp_adjust_tail
 hostprogs-y += xdpsock
 hostprogs-y += xdp_fwd
 hostprogs-y += task_fd_query
+hostprogs-y += rds_filter
 
 # Libbpf dependencies
 LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -107,6 +108,7 @@ xdp_adjust_tail-objs := xdp_adjust_tail_user.o
 xdpsock-objs := bpf_load.o xdpsock_user.o
 xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
 task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
+rds_filter-objs := bpf_load.o rds_filter_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -163,6 +165,7 @@ always += xdp_adjust_tail_kern.o
 always += xdpsock_kern.o
 always += xdp_fwd_kern.o
 always += task_fd_query_kern.o
+always += rds_filter_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/bpf/rds_filter_kern.c b/samples/bpf/rds_filter_kern.c
new file mode 100644
index 0000000..8fe3d3c
--- /dev/null
+++ b/samples/bpf/rds_filter_kern.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/filter.h>
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include <linux/rds.h>
+#include "bpf_helpers.h"
+
+#define PROG(F) SEC("socksg/"__stringify(F)) int bpf_func_##F
+
+#define bpf_printk(fmt, ...)                           \
+({                                                     \
+       char ____fmt[] = fmt;                           \
+       bpf_trace_printk(____fmt, sizeof(____fmt),      \
+                       ##__VA_ARGS__);                 \
+})
+
+struct bpf_map_def SEC("maps") jmp_table = {
+       .type = BPF_MAP_TYPE_PROG_ARRAY,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(u32),
+       .max_entries = 2,
+};
+
+#define SG1 1
+
+static inline void dump_sg(struct sg_filter_md *sg)
+{
+       void *data = (void *)(long) sg->data;
+       void *data_end = (void *)(long) sg->data_end;
+       unsigned char *d;
+
+       if (data + 8 > data_end)
+               return;
+
+       d = (unsigned char *)data;
+       bpf_printk("%x %x %x\n", d[0], d[1], d[2]);
+       bpf_printk("%x %x %x\n", d[3], d[4], d[5]);
+
+       return;
+
+}
+
+static void sg_dispatcher(struct sg_filter_md *sg)
+{
+       int ret;
+
+       ret = bpf_sg_next(sg);
+       if (ret == -ENODATA) {
+               bpf_printk("no more sg element\n");
+               return;
+       }
+
+       /* We use same function to walk sg list */
+       bpf_tail_call(sg, &jmp_table, 1);
+}
+
+/* walk sg list */
+PROG(SG1)(struct sg_filter_md *sg)
+{
+       bpf_printk("next sg element:\n");
+       dump_sg(sg);
+       sg_dispatcher(sg);
+       return 0;
+}
+
+SEC("socksg/0")
+int main_prog(struct sg_filter_md *sg)
+{
+       bpf_printk("Print first 6 bytes from sg element\n");
+       bpf_printk("First sg element:\n");
+       dump_sg(sg);
+       sg_dispatcher(sg);
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/rds_filter_user.c b/samples/bpf/rds_filter_user.c
new file mode 100644
index 0000000..1165f1e
--- /dev/null
+++ b/samples/bpf/rds_filter_user.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <arpa/inet.h>
+#include <assert.h>
+#include "bpf_load.h"
+#include <getopt.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <limits.h>
+#include <linux/sockios.h>
+#include <linux/rds.h>
+#include <linux/errqueue.h>
+#include <linux/bpf.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#define TESTPORT       4000
+#define BUFSIZE                8192
+
+int transport = -1;
+
+static int str2trans(const char *trans)
+{
+       if (strcmp(trans, "tcp") == 0)
+               return RDS_TRANS_TCP;
+       if (strcmp(trans, "ib") == 0)
+               return RDS_TRANS_IB;
+       return (RDS_TRANS_NONE);
+}
+
+static const char *trans2str(int trans)
+{
+       switch (trans) {
+       case RDS_TRANS_TCP:
+               return ("tcp");
+       case RDS_TRANS_IB:
+               return ("ib");
+       case RDS_TRANS_NONE:
+               return ("none");
+       default:
+               return ("unknown");
+       }
+}
+
+static int gettransport(int sock)
+{
+       int err;
+       char val;
+       socklen_t len = sizeof(int);
+
+       err = getsockopt(sock, SOL_RDS, SO_RDS_TRANSPORT,
+                        (char *)&val, &len);
+       if (err < 0) {
+               fprintf(stderr, "%s: getsockopt %s\n",
+                       __func__, strerror(errno));
+               return err;
+       }
+       return (int)val;
+}
+
+static int settransport(int sock, int transport)
+{
+       int err;
+
+       err = setsockopt(sock, SOL_RDS, SO_RDS_TRANSPORT,
+                        (char *)&transport, sizeof(transport));
+       if (err < 0) {
+               fprintf(stderr, "could not set transport %s, %s\n",
+                       trans2str(transport), strerror(errno));
+       }
+       return err;
+}
+
+static void print_sock_local_info(int fd, char *str, struct sockaddr_in *ret)
+{
+       socklen_t sin_size = sizeof(struct sockaddr_in);
+       struct sockaddr_in sin;
+       int err;
+
+       err = getsockname(fd, (struct sockaddr *)&sin, &sin_size);
+       if (err < 0) {
+               fprintf(stderr, "%s getsockname %s\n",
+                       __func__, strerror(errno));
+               return;
+       }
+       printf("%s address: %s port %d\n",
+               (str ? str : ""), inet_ntoa(sin.sin_addr), ntohs(sin.sin_port));
+
+       if (ret != NULL)
+               *ret = sin;
+}
+
+static void print_payload(char *buf)
+{
+       int i;
+
+       printf("payload contains:");
+       for (i = 0; i < 10; i++)
+               printf("%x ", buf[i]);
+       printf("...\n");
+}
+
+static void server(char *address, in_port_t port)
+{
+       struct sockaddr_in sin, din;
+       struct msghdr msg;
+       struct iovec *iov;
+       int rc, sock;
+       char *buf;
+
+       buf = calloc(BUFSIZE, sizeof(char));
+       if (!buf) {
+               fprintf(stderr, "%s: calloc %s\n", __func__, strerror(errno));
+               return;
+       }
+
+       sock = socket(PF_RDS, SOCK_SEQPACKET, 0);
+       if (sock < 0) {
+               fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno));
+               goto out;
+       }
+       if (settransport(sock, transport) < 0)
+               goto out;
+
+       printf("transport %s\n", trans2str(gettransport(sock)));
+
+       memset(&sin, 0, sizeof(sin));
+       sin.sin_family = AF_INET;
+       sin.sin_addr.s_addr = inet_addr(address);
+       sin.sin_port = htons(port);
+
+       rc = bind(sock, (struct sockaddr *)&sin, sizeof(sin));
+       if (rc < 0) {
+               fprintf(stderr, "%s: bind %s\n", __func__, strerror(errno));
+               goto out;
+       }
+
+       /* attach bpf prog */
+       assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[1],
+                         sizeof(prog_fd[0])) == 0);
+
+       print_sock_local_info(sock, "server bound to", NULL);
+
+       iov = calloc(1, sizeof(struct iovec));
+       if (!iov) {
+               fprintf(stderr, "%s: calloc %s\n", __func__, strerror(errno));
+               goto out;
+       }
+
+       while (1) {
+               memset(buf, 0, BUFSIZE);
+               iov[0].iov_base = buf;
+               iov[0].iov_len = BUFSIZE;
+
+               memset(&msg, 0, sizeof(msg));
+               msg.msg_name = &din;
+               msg.msg_namelen = sizeof(din);
+               msg.msg_iov = iov;
+               msg.msg_iovlen = 1;
+
+               printf("server listening on %s\n", inet_ntoa(sin.sin_addr));
+
+               rc = recvmsg(sock, &msg, 0);
+               if (rc < 0) {
+                       fprintf(stderr, "%s: recvmsg %s\n",
+                               __func__, strerror(errno));
+                       break;
+               }
+
+               printf("%s received a packet from %s of len %d cmsg len %d, on 
port %d\n",
+                       inet_ntoa(sin.sin_addr),
+                       inet_ntoa(din.sin_addr),
+                       (uint32_t) iov[0].iov_len,
+                       (uint32_t) msg.msg_controllen,
+                       ntohs(din.sin_port));
+
+               print_payload(buf);
+       }
+       free(iov);
+out:
+       free(buf);
+}
+
+static void create_message(char *buf)
+{
+       unsigned int i;
+
+       for (i = 0; i < BUFSIZE; i++) {
+               buf[i] = i + 0x30;
+       }
+}
+
+static int build_rds_packet(struct msghdr *msg, char *buf)
+{
+       struct iovec *iov;
+
+       iov = calloc(1, sizeof(struct iovec));
+       if (!iov) {
+               fprintf(stderr, "%s: calloc %s\n", __func__, strerror(errno));
+               return -1;
+       }
+
+       msg->msg_iov = iov;
+       msg->msg_iovlen = 1;
+
+       iov[0].iov_base = buf;
+       iov[0].iov_len = BUFSIZE * sizeof(char);
+
+       return 0;
+}
+
+static void client(char *localaddr, char *remoteaddr, in_port_t server_port)
+{
+       struct sockaddr_in sin, din;
+       struct msghdr msg;
+       int rc, sock;
+       char *buf;
+
+       buf = calloc(BUFSIZE, sizeof(char));
+       if (!buf) {
+               fprintf(stderr, "%s: calloc %s\n", __func__, strerror(errno));
+               return;
+       }
+
+       create_message(buf);
+
+       sock = socket(PF_RDS, SOCK_SEQPACKET, 0);
+       if (sock < 0) {
+               fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno));
+               goto out;
+       }
+
+       if (settransport(sock, transport) < 0)
+               goto out;
+
+       printf("transport %s\n", trans2str(gettransport(sock)));
+
+       memset(&sin, 0, sizeof(sin));
+       sin.sin_family = AF_INET;
+       sin.sin_addr.s_addr = inet_addr(localaddr);
+       sin.sin_port = 0;
+
+       rc = bind(sock, (struct sockaddr *)&sin, sizeof(sin));
+       if (rc < 0) {
+               fprintf(stderr, "%s: bind %s\n", __func__, strerror(errno));
+               goto out;
+       }
+       print_sock_local_info(sock, "client bound to",  &sin);
+
+       memset(&msg, 0, sizeof(msg));
+       msg.msg_name = &din;
+       msg.msg_namelen = sizeof(din);
+
+       memset(&din, 0, sizeof(din));
+       din.sin_family = AF_INET;
+       din.sin_addr.s_addr = inet_addr(remoteaddr);
+       din.sin_port = htons(server_port);
+
+       rc = build_rds_packet(&msg, buf);
+       if (rc < 0)
+               goto out;
+
+       printf("client sending %d byte message from %s to %s on port %d\n",
+               (uint32_t) msg.msg_iov->iov_len, localaddr,
+               remoteaddr, ntohs(sin.sin_port));
+
+       rc = sendmsg(sock, &msg, 0);
+       if (rc < 0)
+               fprintf(stderr, "%s: sendmsg %s\n", __func__, strerror(errno));
+
+       print_payload(buf);
+
+       if (msg.msg_control)
+               free(msg.msg_control);
+       if (msg.msg_iov)
+               free(msg.msg_iov);
+out:
+       free(buf);
+
+       return;
+}
+
+static void usage(char *progname)
+{
+       fprintf(stderr, "Usage %s [-s srvaddr] [-c clientaddr] [-t transport]"
+               "\n", progname);
+}
+
+int main(int argc, char **argv)
+{
+       in_port_t server_port = TESTPORT;
+       char *serveraddr = NULL;
+       char *clientaddr = NULL;
+       char filename[256];
+       int opt;
+
+       while ((opt = getopt(argc, argv, "s:c:t:")) != -1) {
+               switch (opt) {
+               case 's':
+                       serveraddr = optarg;
+                       break;
+               case 'c':
+                       clientaddr = optarg;
+                       break;
+               case 't':
+                       transport = str2trans(optarg);
+                       if (transport == RDS_TRANS_NONE) {
+                               fprintf(stderr,
+                                       "unknown transport %s\n", optarg);
+                                       usage(argv[0]);
+                                       return (-1);
+                       }
+                       break;
+               default:
+                       usage(argv[0]);
+                       return 1;
+               }
+       }
+
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+       if (load_bpf_file(filename)) {
+               fprintf(stderr, "Error: load_bpf_file %s", bpf_log_buf);
+               return 1;
+       }
+
+       if (serveraddr && !clientaddr) {
+               printf("running server in a loop\n");
+               server(serveraddr, server_port);
+       } else if (serveraddr && clientaddr) {
+               client(clientaddr, serveraddr, server_port);
+       }
+
+       return 0;
+}
-- 
1.8.3.1

Reply via email to