From: Teng Qin <qint...@fb.com>

$ trace_event

tests attaching BPF program to HW_CPU_CYCLES, SW_CPU_CLOCK, HW_CACHE_L1D and 
other events.
It runs 'dd' in the background while bpf program collects user and kernel
stack trace on counter overflow.
User space expects to see sys_read and sys_write in the kernel stack.

$ tracex6

tests reading of various perf counters from BPF program.

Both tests were refactored to increase coverage and be more accurate.

Signed-off-by: Teng Qin <qint...@fb.com>
Signed-off-by: Alexei Starovoitov <a...@kernel.org>
---
 samples/bpf/bpf_helpers.h      |   3 +-
 samples/bpf/trace_event_user.c |  73 ++++++++++++++---
 samples/bpf/tracex6_kern.c     |  28 +++++--
 samples/bpf/tracex6_user.c     | 180 ++++++++++++++++++++++++++++++++---------
 4 files changed, 228 insertions(+), 56 deletions(-)

diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 9a9c95f2c9fb..51e567bc70fc 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -31,7 +31,8 @@ static unsigned long long (*bpf_get_current_uid_gid)(void) =
        (void *) BPF_FUNC_get_current_uid_gid;
 static int (*bpf_get_current_comm)(void *buf, int buf_size) =
        (void *) BPF_FUNC_get_current_comm;
-static int (*bpf_perf_event_read)(void *map, int index) =
+static unsigned long long (*bpf_perf_event_read)(void *map,
+                                                unsigned long long flags) =
        (void *) BPF_FUNC_perf_event_read;
 static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
        (void *) BPF_FUNC_clone_redirect;
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index fa4336423da5..7bd827b84a67 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -75,7 +75,10 @@ static void print_stack(struct key_t *key, __u64 count)
                for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
                        print_addr(ip[i]);
        }
-       printf("\n");
+       if (count < 6)
+               printf("\r");
+       else
+               printf("\n");
 
        if (key->kernstack == -EEXIST && !warned) {
                printf("stackmap collisions seen. Consider increasing size\n");
@@ -105,7 +108,7 @@ static void print_stacks(void)
                bpf_map_delete_elem(fd, &next_key);
                key = next_key;
        }
-
+       printf("\n");
        if (!sys_read_seen || !sys_write_seen) {
                printf("BUG kernel stack doesn't contain sys_read() and 
sys_write()\n");
                int_exit(0);
@@ -122,24 +125,29 @@ static void test_perf_event_all_cpu(struct 
perf_event_attr *attr)
 {
        int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
        int *pmu_fd = malloc(nr_cpus * sizeof(int));
-       int i;
+       int i, error = 0;
 
        /* open perf_event on all cpus */
        for (i = 0; i < nr_cpus; i++) {
                pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0);
                if (pmu_fd[i] < 0) {
                        printf("sys_perf_event_open failed\n");
+                       error = 1;
                        goto all_cpu_err;
                }
                assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 
0);
-               assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0);
+               assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE) == 0);
        }
-       system("dd if=/dev/zero of=/dev/null count=5000k");
+       system("dd if=/dev/zero of=/dev/null count=5000k status=none");
        print_stacks();
 all_cpu_err:
-       for (i--; i >= 0; i--)
+       for (i--; i >= 0; i--) {
+               ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE);
                close(pmu_fd[i]);
+       }
        free(pmu_fd);
+       if (error)
+               int_exit(0);
 }
 
 static void test_perf_event_task(struct perf_event_attr *attr)
@@ -150,12 +158,13 @@ static void test_perf_event_task(struct perf_event_attr 
*attr)
        pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0);
        if (pmu_fd < 0) {
                printf("sys_perf_event_open failed\n");
-               return;
+               int_exit(0);
        }
        assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
-       assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0) == 0);
-       system("dd if=/dev/zero of=/dev/null count=5000k");
+       assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE) == 0);
+       system("dd if=/dev/zero of=/dev/null count=5000k status=none");
        print_stacks();
+       ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
        close(pmu_fd);
 }
 
@@ -175,11 +184,56 @@ static void test_bpf_perf_event(void)
                .config = PERF_COUNT_SW_CPU_CLOCK,
                .inherit = 1,
        };
+       struct perf_event_attr attr_hw_cache_l1d = {
+               .sample_freq = SAMPLE_FREQ,
+               .freq = 1,
+               .type = PERF_TYPE_HW_CACHE,
+               .config =
+                       PERF_COUNT_HW_CACHE_L1D |
+                       (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+                       (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16),
+               .inherit = 1,
+       };
+       struct perf_event_attr attr_hw_cache_branch_miss = {
+               .sample_freq = SAMPLE_FREQ,
+               .freq = 1,
+               .type = PERF_TYPE_HW_CACHE,
+               .config =
+                       PERF_COUNT_HW_CACHE_BPU |
+                       (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+                       (PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
+               .inherit = 1,
+       };
+       struct perf_event_attr attr_type_raw = {
+               .sample_freq = SAMPLE_FREQ,
+               .freq = 1,
+               .type = PERF_TYPE_RAW,
+               /* Intel Instruction Retired */
+               .config = 0xc0,
+               .inherit = 1,
+       };
 
+       printf("Test HW_CPU_CYCLES\n");
        test_perf_event_all_cpu(&attr_type_hw);
        test_perf_event_task(&attr_type_hw);
+
+       printf("Test SW_CPU_CLOCK\n");
        test_perf_event_all_cpu(&attr_type_sw);
        test_perf_event_task(&attr_type_sw);
+
+       printf("Test HW_CACHE_L1D\n");
+       test_perf_event_all_cpu(&attr_hw_cache_l1d);
+       test_perf_event_task(&attr_hw_cache_l1d);
+
+       printf("Test HW_CACHE_BPU\n");
+       test_perf_event_all_cpu(&attr_hw_cache_branch_miss);
+       test_perf_event_task(&attr_hw_cache_branch_miss);
+
+       printf("Test Instruction Retired\n");
+       test_perf_event_all_cpu(&attr_type_raw);
+       test_perf_event_task(&attr_type_raw);
+
+       printf("*** PASS ***\n");
 }
 
 
@@ -209,7 +263,6 @@ int main(int argc, char **argv)
                return 0;
        }
        test_bpf_perf_event();
-
        int_exit(0);
        return 0;
 }
diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6_kern.c
index be479c4af9e2..8a7d0f977625 100644
--- a/samples/bpf/tracex6_kern.c
+++ b/samples/bpf/tracex6_kern.c
@@ -3,22 +3,36 @@
 #include <uapi/linux/bpf.h>
 #include "bpf_helpers.h"
 
-struct bpf_map_def SEC("maps") my_map = {
+struct bpf_map_def SEC("maps") counters = {
        .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
        .key_size = sizeof(int),
        .value_size = sizeof(u32),
-       .max_entries = 32,
+       .max_entries = 64,
+};
+struct bpf_map_def SEC("maps") values = {
+       .type = BPF_MAP_TYPE_HASH,
+       .key_size = sizeof(int),
+       .value_size = sizeof(u64),
+       .max_entries = 64,
 };
 
-SEC("kprobe/sys_write")
+SEC("kprobe/htab_map_get_next_key")
 int bpf_prog1(struct pt_regs *ctx)
 {
-       u64 count;
        u32 key = bpf_get_smp_processor_id();
-       char fmt[] = "CPU-%d   %llu\n";
+       u64 count, *val;
+       s64 error;
+
+       count = bpf_perf_event_read(&counters, key);
+       error = (s64)count;
+       if (error <= -2 && error >= -95)
+               return 0;
 
-       count = bpf_perf_event_read(&my_map, key);
-       bpf_trace_printk(fmt, sizeof(fmt), key, count);
+       val = bpf_map_lookup_elem(&values, &key);
+       if (val)
+               *val = count;
+       else
+               bpf_map_update_elem(&values, &key, &count, BPF_NOEXIST);
 
        return 0;
 }
diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c
index ca7874ed77f4..a05a99a0752f 100644
--- a/samples/bpf/tracex6_user.c
+++ b/samples/bpf/tracex6_user.c
@@ -1,73 +1,177 @@
-#include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <string.h>
+#define _GNU_SOURCE
+
+#include <assert.h>
 #include <fcntl.h>
-#include <poll.h>
-#include <sys/ioctl.h>
 #include <linux/perf_event.h>
 #include <linux/bpf.h>
-#include "libbpf.h"
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
 #include "bpf_load.h"
+#include "libbpf.h"
 #include "perf-sys.h"
 
 #define SAMPLE_PERIOD  0x7fffffffffffffffULL
 
-static void test_bpf_perf_event(void)
+static void check_on_cpu(int cpu, struct perf_event_attr *attr)
 {
-       int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
-       int *pmu_fd = malloc(nr_cpus * sizeof(int));
-       int status, i;
+       int pmu_fd, error = 0;
+       cpu_set_t set;
+       __u64 value;
 
-       struct perf_event_attr attr_insn_pmu = {
+       /* Move to target CPU */
+       CPU_ZERO(&set);
+       CPU_SET(cpu, &set);
+       assert(sched_setaffinity(0, sizeof(set), &set) == 0);
+       /* Open perf event and attach to the perf_event_array */
+       pmu_fd = sys_perf_event_open(attr, -1/*pid*/, cpu/*cpu*/, 
-1/*group_fd*/, 0);
+       if (pmu_fd < 0) {
+               fprintf(stderr, "sys_perf_event_open failed on CPU %d\n", cpu);
+               error = 1;
+               goto on_exit;
+       }
+       assert(bpf_map_update_elem(map_fd[0], &cpu, &pmu_fd, BPF_ANY) == 0);
+       assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0) == 0);
+       /* Trigger the kprobe */
+       bpf_map_get_next_key(map_fd[1], &cpu, NULL);
+       /* Check the value */
+       if (bpf_map_lookup_elem(map_fd[1], &cpu, &value)) {
+               fprintf(stderr, "Value missing for CPU %d\n", cpu);
+               error = 1;
+               goto on_exit;
+       }
+       fprintf(stderr, "CPU %d: %llu\n", cpu, value);
+
+on_exit:
+       assert(bpf_map_delete_elem(map_fd[0], &cpu) == 0 || error);
+       assert(ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE, 0) == 0 || error);
+       assert(close(pmu_fd) == 0 || error);
+       assert(bpf_map_delete_elem(map_fd[1], &cpu) == 0 || error);
+       exit(error);
+}
+
+static void test_perf_event_array(struct perf_event_attr *attr,
+                                 const char *name)
+{
+       int i, status, nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+       pid_t pid[nr_cpus];
+       int err = 0;
+
+       printf("Test reading %s counters\n", name);
+
+       for (i = 0; i < nr_cpus; i++) {
+               pid[i] = fork();
+               assert(pid[i] >= 0);
+               if (pid[i] == 0) {
+                       check_on_cpu(i, attr);
+                       exit(1);
+               }
+       }
+
+       for (i = 0; i < nr_cpus; i++) {
+               assert(waitpid(pid[i], &status, 0) == pid[i]);
+               err |= status;
+       }
+
+       if (err)
+               printf("Test: %s FAILED\n", name);
+}
+
+static void test_bpf_perf_event(void)
+{
+       struct perf_event_attr attr_cycles = {
                .freq = 0,
                .sample_period = SAMPLE_PERIOD,
                .inherit = 0,
                .type = PERF_TYPE_HARDWARE,
                .read_format = 0,
                .sample_type = 0,
-               .config = 0,/* PMU: cycles */
+               .config = PERF_COUNT_HW_CPU_CYCLES,
+       };
+       struct perf_event_attr attr_clock = {
+               .freq = 0,
+               .sample_period = SAMPLE_PERIOD,
+               .inherit = 0,
+               .type = PERF_TYPE_SOFTWARE,
+               .read_format = 0,
+               .sample_type = 0,
+               .config = PERF_COUNT_SW_CPU_CLOCK,
+       };
+       struct perf_event_attr attr_raw = {
+               .freq = 0,
+               .sample_period = SAMPLE_PERIOD,
+               .inherit = 0,
+               .type = PERF_TYPE_RAW,
+               .read_format = 0,
+               .sample_type = 0,
+               /* Intel Instruction Retired */
+               .config = 0xc0,
+       };
+       struct perf_event_attr attr_l1d_load = {
+               .freq = 0,
+               .sample_period = SAMPLE_PERIOD,
+               .inherit = 0,
+               .type = PERF_TYPE_HW_CACHE,
+               .read_format = 0,
+               .sample_type = 0,
+               .config =
+                       PERF_COUNT_HW_CACHE_L1D |
+                       (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+                       (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16),
+       };
+       struct perf_event_attr attr_llc_miss = {
+               .freq = 0,
+               .sample_period = SAMPLE_PERIOD,
+               .inherit = 0,
+               .type = PERF_TYPE_HW_CACHE,
+               .read_format = 0,
+               .sample_type = 0,
+               .config =
+                       PERF_COUNT_HW_CACHE_LL |
+                       (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+                       (PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
+       };
+       struct perf_event_attr attr_msr_tsc = {
+               .freq = 0,
+               .sample_period = 0,
+               .inherit = 0,
+               /* From /sys/bus/event_source/devices/msr/ */
+               .type = 7,
+               .read_format = 0,
+               .sample_type = 0,
+               .config = 0,
        };
 
-       for (i = 0; i < nr_cpus; i++) {
-               pmu_fd[i] = sys_perf_event_open(&attr_insn_pmu, -1/*pid*/, 
i/*cpu*/, -1/*group_fd*/, 0);
-               if (pmu_fd[i] < 0) {
-                       printf("event syscall failed\n");
-                       goto exit;
-               }
-
-               bpf_map_update_elem(map_fd[0], &i, &pmu_fd[i], BPF_ANY);
-               ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0);
-       }
+       test_perf_event_array(&attr_cycles, "HARDWARE-cycles");
+       test_perf_event_array(&attr_clock, "SOFTWARE-clock");
+       test_perf_event_array(&attr_raw, "RAW-instruction-retired");
+       test_perf_event_array(&attr_l1d_load, "HW_CACHE-L1D-load");
 
-       status = system("ls > /dev/null");
-       if (status)
-               goto exit;
-       status = system("sleep 2");
-       if (status)
-               goto exit;
-
-exit:
-       for (i = 0; i < nr_cpus; i++)
-               close(pmu_fd[i]);
-       close(map_fd[0]);
-       free(pmu_fd);
+       /* below tests may fail in qemu */
+       test_perf_event_array(&attr_llc_miss, "HW_CACHE-LLC-miss");
+       test_perf_event_array(&attr_msr_tsc, "Dynamic-msr-tsc");
 }
 
 int main(int argc, char **argv)
 {
+       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
        char filename[256];
 
        snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 
+       setrlimit(RLIMIT_MEMLOCK, &r);
        if (load_bpf_file(filename)) {
                printf("%s", bpf_log_buf);
                return 1;
        }
 
        test_bpf_perf_event();
-       read_trace_pipe();
-
        return 0;
 }
-- 
2.9.3

Reply via email to