tracex1_kern.c - C program compiled into BPF.
It attaches to kprobe:netif_receive_skb
When skb->dev->name == "lo", it prints sample debug message into trace_pipe
via bpf_trace_printk() helper function.

tracex1_user.c - corresponding user space component that:
- loads bpf program via bpf() syscall
- opens kprobes:netif_receive_skb event via perf_event_open() syscall
- attaches the program to event via ioctl(event_fd, PERF_EVENT_IOC_SET_BPF, 
prog_fd);
- prints from trace_pipe

Note, this bpf program is completely non-portable. It must be recompiled
with current kernel headers. kprobe is not a stable ABI and bpf+kprobe scripts
may stop working any time.

bpf verifier will detect that it's using bpf_trace_printk() and kernel will
print warning banner:
** trace_printk() being used. Allocating extra memory.  **
**                                                      **
** This means that this is a DEBUG kernel and it is     **
** unsafe for production use.                           **

bpf_trace_printk() should be used for debugging of bpf program only.

Usage:
$ sudo tracex1
            ping-19826 [000] d.s2 63103.382648: : skb ffff880466b1ca00 len 84
            ping-19826 [000] d.s2 63103.382684: : skb ffff880466b1d300 len 84

            ping-19826 [000] d.s2 63104.382533: : skb ffff880466b1ca00 len 84
            ping-19826 [000] d.s2 63104.382594: : skb ffff880466b1d300 len 84

Signed-off-by: Alexei Starovoitov <a...@plumgrid.com>
---
 samples/bpf/Makefile        |    4 ++
 samples/bpf/bpf_helpers.h   |    6 +++
 samples/bpf/bpf_load.c      |  125 ++++++++++++++++++++++++++++++++++++++++---
 samples/bpf/bpf_load.h      |    3 ++
 samples/bpf/libbpf.c        |   14 ++++-
 samples/bpf/libbpf.h        |    5 +-
 samples/bpf/sock_example.c  |    2 +-
 samples/bpf/test_verifier.c |    2 +-
 samples/bpf/tracex1_kern.c  |   50 +++++++++++++++++
 samples/bpf/tracex1_user.c  |   25 +++++++++
 10 files changed, 224 insertions(+), 12 deletions(-)
 create mode 100644 samples/bpf/tracex1_kern.c
 create mode 100644 samples/bpf/tracex1_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index b5b3600dcdf5..51f6f01e5a3a 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -6,23 +6,27 @@ hostprogs-y := test_verifier test_maps
 hostprogs-y += sock_example
 hostprogs-y += sockex1
 hostprogs-y += sockex2
+hostprogs-y += tracex1
 
 test_verifier-objs := test_verifier.o libbpf.o
 test_maps-objs := test_maps.o libbpf.o
 sock_example-objs := sock_example.o libbpf.o
 sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
 sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
+tracex1-objs := bpf_load.o libbpf.o tracex1_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
 always += sockex1_kern.o
 always += sockex2_kern.o
+always += tracex1_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
 HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
 HOSTLOADLIBES_sockex1 += -lelf
 HOSTLOADLIBES_sockex2 += -lelf
+HOSTLOADLIBES_tracex1 += -lelf
 
 # point this to your LLVM backend with bpf support
 LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index ca0333146006..1c872bcf5a80 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -15,6 +15,12 @@ static int (*bpf_map_update_elem)(void *map, void *key, void 
*value,
        (void *) BPF_FUNC_map_update_elem;
 static int (*bpf_map_delete_elem)(void *map, void *key) =
        (void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
+       (void *) BPF_FUNC_probe_read;
+static unsigned long long (*bpf_ktime_get_ns)(void) =
+       (void *) BPF_FUNC_ktime_get_ns;
+static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
+       (void *) BPF_FUNC_trace_printk;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 1831d236382b..95c106e4bcdb 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -8,29 +8,70 @@
 #include <unistd.h>
 #include <string.h>
 #include <stdbool.h>
+#include <stdlib.h>
 #include <linux/bpf.h>
 #include <linux/filter.h>
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <poll.h>
 #include "libbpf.h"
 #include "bpf_helpers.h"
 #include "bpf_load.h"
 
+#define DEBUGFS "/sys/kernel/debug/tracing/"
+
 static char license[128];
+static int kern_version;
 static bool processed_sec[128];
 int map_fd[MAX_MAPS];
 int prog_fd[MAX_PROGS];
+int event_fd[MAX_PROGS];
 int prog_cnt;
 
 static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 {
-       int fd;
        bool is_socket = strncmp(event, "socket", 6) == 0;
-
-       if (!is_socket)
-               /* tracing events tbd */
+       bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
+       bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
+       enum bpf_prog_type prog_type;
+       char buf[256];
+       int fd, efd, err, id;
+       struct perf_event_attr attr = {};
+
+       attr.type = PERF_TYPE_TRACEPOINT;
+       attr.sample_type = PERF_SAMPLE_RAW;
+       attr.sample_period = 1;
+       attr.wakeup_events = 1;
+
+       if (is_socket) {
+               prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+       } else if (is_kprobe || is_kretprobe) {
+               prog_type = BPF_PROG_TYPE_KPROBE;
+       } else {
+               printf("Unknown event '%s'\n", event);
                return -1;
+       }
+
+       if (is_kprobe || is_kretprobe) {
+               if (is_kprobe)
+                       event += 7;
+               else
+                       event += 10;
+
+               snprintf(buf, sizeof(buf),
+                        "echo '%c:%s %s' >> 
/sys/kernel/debug/tracing/kprobe_events",
+                        is_kprobe ? 'p' : 'r', event, event);
+               err = system(buf);
+               if (err < 0) {
+                       printf("failed to create kprobe '%s' error '%s'\n",
+                              event, strerror(errno));
+                       return -1;
+               }
+       }
 
-       fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER,
-                          prog, size, license);
+       fd = bpf_prog_load(prog_type, prog, size, license, kern_version);
 
        if (fd < 0) {
                printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
@@ -39,6 +80,41 @@ static int load_and_attach(const char *event, struct 
bpf_insn *prog, int size)
 
        prog_fd[prog_cnt++] = fd;
 
+       if (is_socket)
+               return 0;
+
+       strcpy(buf, DEBUGFS);
+       strcat(buf, "events/kprobes/");
+       strcat(buf, event);
+       strcat(buf, "/id");
+
+       efd = open(buf, O_RDONLY, 0);
+       if (efd < 0) {
+               printf("failed to open event %s\n", event);
+               return -1;
+       }
+
+       err = read(efd, buf, sizeof(buf));
+       if (err < 0 || err >= sizeof(buf)) {
+               printf("read from '%s' failed '%s'\n", event, strerror(errno));
+               return -1;
+       }
+
+       close(efd);
+
+       buf[err] = 0;
+       id = atoi(buf);
+       attr.config = id;
+
+       efd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
+       if (efd < 0) {
+               printf("event %d fd %d err %s\n", id, efd, strerror(errno));
+               return -1;
+       }
+       event_fd[prog_cnt - 1] = efd;
+       ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
+       ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
+
        return 0;
 }
 
@@ -135,6 +211,9 @@ int load_bpf_file(char *path)
        if (gelf_getehdr(elf, &ehdr) != &ehdr)
                return 1;
 
+       /* clear all kprobes */
+       i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
+
        /* scan over all elf sections to get license and map info */
        for (i = 1; i < ehdr.e_shnum; i++) {
 
@@ -149,6 +228,14 @@ int load_bpf_file(char *path)
                if (strcmp(shname, "license") == 0) {
                        processed_sec[i] = true;
                        memcpy(license, data->d_buf, data->d_size);
+               } else if (strcmp(shname, "version") == 0) {
+                       processed_sec[i] = true;
+                       if (data->d_size != sizeof(int)) {
+                               printf("invalid size of version section %zd\n",
+                                      data->d_size);
+                               return 1;
+                       }
+                       memcpy(&kern_version, data->d_buf, sizeof(int));
                } else if (strcmp(shname, "maps") == 0) {
                        processed_sec[i] = true;
                        if (load_maps(data->d_buf, data->d_size))
@@ -178,7 +265,8 @@ int load_bpf_file(char *path)
                        if (parse_relo_and_apply(data, symbols, &shdr, insns))
                                continue;
 
-                       if (memcmp(shname_prog, "events/", 7) == 0 ||
+                       if (memcmp(shname_prog, "kprobe/", 7) == 0 ||
+                           memcmp(shname_prog, "kretprobe/", 10) == 0 ||
                            memcmp(shname_prog, "socket", 6) == 0)
                                load_and_attach(shname_prog, insns, 
data_prog->d_size);
                }
@@ -193,7 +281,8 @@ int load_bpf_file(char *path)
                if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
                        continue;
 
-               if (memcmp(shname, "events/", 7) == 0 ||
+               if (memcmp(shname, "kprobe/", 7) == 0 ||
+                   memcmp(shname, "kretprobe/", 10) == 0 ||
                    memcmp(shname, "socket", 6) == 0)
                        load_and_attach(shname, data->d_buf, data->d_size);
        }
@@ -201,3 +290,23 @@ int load_bpf_file(char *path)
        close(fd);
        return 0;
 }
+
+void read_trace_pipe(void)
+{
+       int trace_fd;
+
+       trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
+       if (trace_fd < 0)
+               return;
+
+       while (1) {
+               static char buf[4096];
+               ssize_t sz;
+
+               sz = read(trace_fd, buf, sizeof(buf));
+               if (sz) {
+                       buf[sz] = 0;
+                       puts(buf);
+               }
+       }
+}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index 27789a34f5e6..cbd7c2b532b9 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -6,6 +6,7 @@
 
 extern int map_fd[MAX_MAPS];
 extern int prog_fd[MAX_PROGS];
+extern int event_fd[MAX_PROGS];
 
 /* parses elf file compiled by llvm .c->.o
  * . parses 'maps' section and creates maps via BPF syscall
@@ -21,4 +22,6 @@ extern int prog_fd[MAX_PROGS];
  */
 int load_bpf_file(char *path);
 
+void read_trace_pipe(void);
+
 #endif
diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c
index 46d50b7ddf79..7e1efa7e2ed7 100644
--- a/samples/bpf/libbpf.c
+++ b/samples/bpf/libbpf.c
@@ -81,7 +81,7 @@ char bpf_log_buf[LOG_BUF_SIZE];
 
 int bpf_prog_load(enum bpf_prog_type prog_type,
                  const struct bpf_insn *insns, int prog_len,
-                 const char *license)
+                 const char *license, int kern_version)
 {
        union bpf_attr attr = {
                .prog_type = prog_type,
@@ -93,6 +93,11 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
                .log_level = 1,
        };
 
+       /* assign one field outside of struct init to make sure any
+        * padding is zero initialized
+        */
+       attr.kern_version = kern_version;
+
        bpf_log_buf[0] = 0;
 
        return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
@@ -121,3 +126,10 @@ int open_raw_sock(const char *name)
 
        return sock;
 }
+
+int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
+                   int group_fd, unsigned long flags)
+{
+       return syscall(__NR_perf_event_open, attr, pid, cpu,
+                      group_fd, flags);
+}
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
index 58c5fe1bdba1..ac7b09672b46 100644
--- a/samples/bpf/libbpf.h
+++ b/samples/bpf/libbpf.h
@@ -13,7 +13,7 @@ int bpf_get_next_key(int fd, void *key, void *next_key);
 
 int bpf_prog_load(enum bpf_prog_type prog_type,
                  const struct bpf_insn *insns, int insn_len,
-                 const char *license);
+                 const char *license, int kern_version);
 
 #define LOG_BUF_SIZE 65536
 extern char bpf_log_buf[LOG_BUF_SIZE];
@@ -182,4 +182,7 @@ extern char bpf_log_buf[LOG_BUF_SIZE];
 /* create RAW socket and bind to interface 'name' */
 int open_raw_sock(const char *name);
 
+struct perf_event_attr;
+int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
+                   int group_fd, unsigned long flags);
 #endif
diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c
index c8ad0404416f..a0ce251c5390 100644
--- a/samples/bpf/sock_example.c
+++ b/samples/bpf/sock_example.c
@@ -56,7 +56,7 @@ static int test_sock(void)
        };
 
        prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog),
-                               "GPL");
+                               "GPL", 0);
        if (prog_fd < 0) {
                printf("failed to load prog '%s'\n", strerror(errno));
                goto cleanup;
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
index b96175e90363..740ce97cda5e 100644
--- a/samples/bpf/test_verifier.c
+++ b/samples/bpf/test_verifier.c
@@ -689,7 +689,7 @@ static int test(void)
 
                prog_fd = bpf_prog_load(BPF_PROG_TYPE_UNSPEC, prog,
                                        prog_len * sizeof(struct bpf_insn),
-                                       "GPL");
+                                       "GPL", 0);
 
                if (tests[i].result == ACCEPT) {
                        if (prog_fd < 0) {
diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c
new file mode 100644
index 000000000000..42176fce4847
--- /dev/null
+++ b/samples/bpf/tracex1_kern.c
@@ -0,0 +1,50 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+
+/* kprobe is NOT a stable ABI
+ * kernel functions can be removed, renamed or completely change semantics.
+ * Number of argumnets and their posistions can change, etc.
+ * This bpf+kprobe example can stop working any time.
+ */
+SEC("kprobe/__netif_receive_skb_core")
+int bpf_prog1(struct pt_regs *ctx)
+{
+       /* attaches to kprobe netif_receive_skb,
+        * looks for packets on loobpack device and prints them
+        */
+       char devname[IFNAMSIZ] = {};
+       struct net_device *dev;
+       struct sk_buff *skb;
+       int len;
+
+       /* non-portable! works for the given kernel only */
+       skb = (struct sk_buff *) ctx->di;
+
+       dev = _(skb->dev);
+
+       len = _(skb->len);
+
+       bpf_probe_read(devname, sizeof(devname), dev->name);
+
+       if (devname[0] == 'l' && devname[1] == 'o') {
+               char fmt[] = "skb %p len %d\n";
+               /* using bpf_trace_printk() for DEBUG ONLY */
+               bpf_trace_printk(fmt, sizeof(fmt), skb, len);
+       }
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex1_user.c b/samples/bpf/tracex1_user.c
new file mode 100644
index 000000000000..31a48183beea
--- /dev/null
+++ b/samples/bpf/tracex1_user.c
@@ -0,0 +1,25 @@
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+int main(int ac, char **argv)
+{
+       FILE *f;
+       char filename[256];
+
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+       if (load_bpf_file(filename)) {
+               printf("%s", bpf_log_buf);
+               return 1;
+       }
+
+       f = popen("taskset 1 ping -c5 localhost", "r");
+       (void) f;
+
+       read_trace_pipe();
+
+       return 0;
+}
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to