This example samples the instruction pointer at a timed interval, and frequency counts it in a BPF map. It is an example of summarizing sampled data in-kernel for passing to user space. It uses the perf:perf_hrtimer tracepoint with perf_events sampling.
Example output: Sampling at 99 Hertz for 5 seconds. Ctrl-C also ends. ADDR KSYM COUNT 0xffffffff81257088 __fsnotify_parent 1 0x7f20b792d9b0 (user) 1 0xffffffff8121469e __vfs_read 1 0xffffffff81214afd rw_verify_area 1 0xffffffff8123327e __fget_light 1 0x7fc0965a6e2c (user) 1 0xffffffff81233c04 __fdget_pos 1 0xffffffff81378528 common_file_perm 1 0x404d90 (user) 1 0xffffffff81214c13 vfs_read 1 [...] 0xffffffff813d9e97 copy_user_enhanced_fast_string 3 0xffffffff817e310c _raw_spin_lock_irqsave 4 0xffffffff817e31a0 entry_SYSCALL_64_fastpath 4 0xffffffff814fb96c extract_crng 6 0xffffffff813d9e95 copy_user_enhanced_fast_string 7 0xffffffff814fb8a3 _extract_crng 7 0xffffffff817e2d55 _raw_spin_unlock_irqrestore 1399 0xffffffff8105fb46 native_safe_halt 2190 It also has basic options: USAGE: sampleip [-F freq] [duration] -F freq # sample frequency (Hertz), default 99 duration # sampling duration (seconds), default 5 Signed-off-by: Brendan Gregg <bgr...@netflix.com> Cc: Alexei Starovoitov <a...@kernel.org> Cc: Wang Nan <wangn...@huawei.com> --- samples/bpf/Makefile | 4 + samples/bpf/sampleip_kern.c | 48 +++++++++++ samples/bpf/sampleip_user.c | 189 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 241 insertions(+) create mode 100644 samples/bpf/sampleip_kern.c create mode 100644 samples/bpf/sampleip_user.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 90ebf7d..dc88a1e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -24,6 +24,7 @@ hostprogs-y += test_overhead hostprogs-y += test_cgrp2_array_pin hostprogs-y += xdp1 hostprogs-y += xdp2 +hostprogs-y += sampleip test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -49,6 +50,7 @@ test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o xdp1-objs := bpf_load.o libbpf.o xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := bpf_load.o libbpf.o xdp1_user.o +sampleip-objs := bpf_load.o libbpf.o sampleip_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -74,6 +76,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o always += test_cgrp2_tc_kern.o always += xdp1_kern.o always += xdp2_kern.o +always += sampleip_kern.o HOSTCFLAGS += -I$(objtree)/usr/include @@ -97,6 +100,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt HOSTLOADLIBES_test_overhead += -lelf -lrt HOSTLOADLIBES_xdp1 += -lelf HOSTLOADLIBES_xdp2 += -lelf +HOSTLOADLIBES_sampleip += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/sampleip_kern.c b/samples/bpf/sampleip_kern.c new file mode 100644 index 0000000..afec3fe --- /dev/null +++ b/samples/bpf/sampleip_kern.c @@ -0,0 +1,48 @@ +/* Copyright 2016 Netflix, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include <linux/ptrace.h> +#include "bpf_helpers.h" + +#define MAX_IPS 8192 + +#define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;}) + +struct bpf_map_def SEC("maps") ip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(u64), + .value_size = sizeof(u32), + .max_entries = MAX_IPS, +}; + +/* from /sys/kernel/debug/tracing/events/perf/perf_hrtimer/format */ +struct perf_hrtimer_args { + unsigned long long pad; + struct pt_regs *regs; + struct perf_event *event; +}; +SEC("tracepoint/perf/perf_hrtimer") +int do_sample(struct perf_hrtimer_args *args) +{ + struct pt_regs *regs; + u64 ip; + u32 *value, init_val = 1; + + regs = _(args->regs); + ip = _(regs->ip); + value = bpf_map_lookup_elem(&ip_map, &ip); + if (value) + *value += 1; + else + /* E2BIG not tested for this example only */ + bpf_map_update_elem(&ip_map, &ip, &init_val, BPF_ANY); + + return 0; +} +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c new file mode 100644 index 0000000..da0727d --- /dev/null +++ b/samples/bpf/sampleip_user.c @@ -0,0 +1,189 @@ +/* + * sampleip: sample instruction pointer and frequency count in a BPF map. + * + * Copyright 2016 Netflix, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stdio.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <signal.h> +#include <string.h> +#include <linux/perf_event.h> +#include <linux/ptrace.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" + +#define DEFAULT_FREQ 99 +#define DEFAULT_SECS 5 +#define MAX_IPS 8192 +#define PAGE_OFFSET 0xffff880000000000 + +static int nr_cpus; + +static void usage(void) +{ + printf("USAGE: sampleip [-F freq] [duration]\n"); + printf(" -F freq # sample frequency (Hertz), default 99\n"); + printf(" duration # sampling duration (seconds), default 5\n"); +} + +static int sampling_start(int *pmu_fd, int freq) +{ + int i; + + struct perf_event_attr pe_sample_attr = { + .type = PERF_TYPE_SOFTWARE, + .freq = 1, + .sample_period = freq, + }; + + for (i = 0; i < nr_cpus; i++) { + pmu_fd[i] = perf_event_open(&pe_sample_attr, -1 /* pid */, i, + -1 /* group_fd */, 0 /* flags */); + if (pmu_fd[i] < 0) { + fprintf(stderr, "ERROR: Initializing perf sampling\n"); + return 1; + } + } + + return 0; +} + +static void sampling_end(int *pmu_fd) +{ + int i; + + for (i = 0; i < nr_cpus; i++) + close(pmu_fd[i]); +} + +struct ipcount { + __u64 ip; + __u32 count; +}; + +/* used for sorting */ +struct ipcount counts[MAX_IPS]; + +static int count_cmp(const void *p1, const void *p2) +{ + return ((struct ipcount *)p1)->count - ((struct ipcount *)p2)->count; +} + +static void print_ip_map(int fd) +{ + struct ksym *sym; + __u64 key, next_key; + __u32 value; + int i, max; + + printf("%-19s %-32s %s\n", "ADDR", "KSYM", "COUNT"); + + /* fetch IPs and counts */ + key = 0, i = 0; + while (bpf_get_next_key(fd, &key, &next_key) == 0) { + bpf_lookup_elem(fd, &next_key, &value); + counts[i].ip = next_key; + counts[i++].count = value; + key = next_key; + } + max = i; + + /* sort and print */ + qsort(counts, max, sizeof(struct ipcount), count_cmp); + for (i = 0; i < max; i++) { + if (counts[i].ip > PAGE_OFFSET) { + sym = ksym_search(counts[i].ip); + printf("0x%-17llx %-32s %u\n", counts[i].ip, sym->name, + counts[i].count); + } else { + printf("0x%-17llx %-32s %u\n", counts[i].ip, "(user)", + counts[i].count); + } + } + + if (max == MAX_IPS) { + printf("WARNING: IP hash was full (max %d entries); ", max); + printf("may have dropped samples\n"); + } +} + +static void int_exit(int sig) +{ + printf("\n"); + print_ip_map(map_fd[0]); + exit(0); +} + +int main(int argc, char **argv) +{ + char filename[256]; + int *pmu_fd, opt, freq = DEFAULT_FREQ, secs = DEFAULT_SECS; + + /* process arguments */ + while ((opt = getopt(argc, argv, "F:h")) != -1) { + switch (opt) { + case 'F': + freq = atoi(optarg); + break; + case 'h': + default: + usage(); + return 0; + } + } + if (argc - optind == 1) + secs = atoi(argv[optind]); + if (freq == 0 || secs == 0) { + usage(); + return 1; + } + + /* initialize kernel symbol translation */ + if (load_kallsyms()) { + fprintf(stderr, "ERROR: loading /proc/kallsyms\n"); + return 2; + } + + /* create perf FDs for each CPU */ + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + pmu_fd = malloc(nr_cpus * sizeof(int)); + if (pmu_fd == NULL) { + fprintf(stderr, "ERROR: malloc of pmu_fd\n"); + return 1; + } + + /* load BPF program */ + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (load_bpf_file(filename)) { + fprintf(stderr, "ERROR: loading BPF program (errno %d):\n", + errno); + if (strcmp(bpf_log_buf, "") == 0) + fprintf(stderr, "Try: ulimit -l unlimited\n"); + else + fprintf(stderr, "%s", bpf_log_buf); + return 1; + } + signal(SIGINT, int_exit); + + /* do sampling */ + printf("Sampling at %d Hertz for %d seconds. Ctrl-C also ends.\n", + freq, secs); + if (sampling_start(pmu_fd, freq) != 0) + return 1; + sleep(secs); + sampling_end(pmu_fd); + free(pmu_fd); + + /* output sample counts */ + print_ip_map(map_fd[0]); + + return 0; +} -- 2.7.4