On 4/22/18 5:23 PM, Alexei Starovoitov wrote:
On Fri, Apr 20, 2018 at 03:18:41PM -0700, Yonghong Song wrote:
The test attached a raw_tracepoint program to sched/sched_switch.
It tested to get stack for user space, kernel space and user
space with build_id request. It also tested to get user
and kernel stack into the same buffer with back-to-back
bpf_get_stack helper calls.

Whenever the kernel stack is available, the user space
application will check to ensure that the kernel function
for raw_tracepoint ___bpf_prog_run is part of the stack.

Signed-off-by: Yonghong Song <y...@fb.com>
---
  tools/testing/selftests/bpf/Makefile               |   3 +-
  tools/testing/selftests/bpf/test_get_stack_rawtp.c | 102 ++++++++++++++++++
  tools/testing/selftests/bpf/test_progs.c           | 115 +++++++++++++++++++++
  3 files changed, 219 insertions(+), 1 deletion(-)
  create mode 100644 tools/testing/selftests/bpf/test_get_stack_rawtp.c

diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index 0b72cc7..54e9e74 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -32,7 +32,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o 
test_tcp_estats.o test
        test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
        sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
        sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o 
test_adjust_tail.o \
-       test_btf_haskv.o test_btf_nokv.o
+       test_btf_haskv.o test_btf_nokv.o test_get_stack_rawtp.o
# Order correspond to 'make run_tests' order
  TEST_PROGS := test_kmod.sh \
@@ -56,6 +56,7 @@ $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
  $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
  $(OUTPUT)/test_sock: cgroup_helpers.c
  $(OUTPUT)/test_sock_addr: cgroup_helpers.c
+$(OUTPUT)/test_progs: trace_helpers.c
.PHONY: force diff --git a/tools/testing/selftests/bpf/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/test_get_stack_rawtp.c
new file mode 100644
index 0000000..ba1dcf9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_get_stack_rawtp.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* Permit pretty deep stack traces */
+#define MAX_STACK_RAWTP 100
+struct stack_trace_t {
+       int pid;
+       int kern_stack_size;
+       int user_stack_size;
+       int user_stack_buildid_size;
+       __u64 kern_stack[MAX_STACK_RAWTP];
+       __u64 user_stack[MAX_STACK_RAWTP];
+       struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
+};
+
+struct bpf_map_def SEC("maps") perfmap = {
+       .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(__u32),
+       .max_entries = 2,
+};
+
+struct bpf_map_def SEC("maps") stackdata_map = {
+       .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(struct stack_trace_t),
+       .max_entries = 1,
+};
+
+/* Allocate per-cpu space twice the needed. For the code below
+ *   usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
+ *   if (usize < 0)
+ *     return 0;
+ *   ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
+ *
+ * If we have value_size = MAX_STACK_RAWTP * sizeof(__u64),
+ * verifier will complain that access "raw_data + usize"
+ * with size "max_len - usize" may be out of bound.
+ * The maximum "raw_data + usize" is "raw_data + max_len"
+ * and the maximum "max_len - usize" is "max_len", verifier
+ * concludes that the maximum buffer access range is
+ * "raw_data[0...max_len * 2 - 1]" and hence reject the program.
+ *
+ * Doubling the to-be-used max buffer size can fix this verifier
+ * issue and avoid complicated C programming massaging.
+ * This is an acceptable workaround since there is one entry here.
+ */
+struct bpf_map_def SEC("maps") rawdata_map = {
+       .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2,
+       .max_entries = 1,
+};
+
+SEC("tracepoint/sched/sched_switch")
+int bpf_prog1(void *ctx)
+{
+       int max_len, max_buildid_len, usize, ksize, total_size;
+       struct stack_trace_t *data;
+       void *raw_data;
+       __u32 key = 0;
+
+       data = bpf_map_lookup_elem(&stackdata_map, &key);
+       if (!data)
+               return 0;
+
+       max_len = MAX_STACK_RAWTP * sizeof(__u64);
+       max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id);
+       data->pid = bpf_get_current_pid_tgid();
+       data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack,
+                                             max_len, 0);
+       data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len,
+                                           BPF_F_USER_STACK);
+       data->user_stack_buildid_size = bpf_get_stack(
+               ctx, data->user_stack_buildid, max_buildid_len,
+               BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
+       bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data));
+
+       /* write both kernel and user stacks to the same buffer */
+       raw_data = bpf_map_lookup_elem(&rawdata_map, &key);
+       if (!raw_data)
+               return 0;
+
+       usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
+       if (usize < 0)
+               return 0;
+
+       ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
+       if (ksize < 0)

may be instead of teaching verifier about ARSH (which doesn't look
straighforward) such use case can be done as:
u32 max_len, usize, ksize;
ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
if ((int)ksize < 0)

Just tried, it does not work. The compiler generates code like:

47: (b7) r9 = 800
48: (bf) r1 = r6
49: (bf) r2 = r7
50: (b7) r3 = 800
51: (b7) r4 = 256
52: (85) call bpf_get_stack#66
R0=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R1_w=ctx(id=0,off=0,imm=0) R2_w=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R3_w=inv800 R4_w=inv256 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R9_w=inv800 R10=fp0,call_-1
53: (b7) r1 = 0
54: (bf) r8 = r0
55: (67) r8 <<= 32
56: (c7) r8 s>>= 32
57: (6d) if r1 s> r8 goto pc+27
R0=inv(id=0,umax_value=800) R1=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R8=inv(id=0,umax_value=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R9=inv800 R10=fp0,call_-1
58: (1f) r9 -= r8
59: (bf) r1 = r8
60: (67) r1 <<= 32
61: (77) r1 >>= 32
62: (bf) r2 = r7
63: (0f) r2 += r1
64: (bf) r1 = r6
65: (bf) r3 = r9
66: (b7) r4 = 0
67: (85) call bpf_get_stack#66
R3 min value is negative, either use unsigned or 'var &= const'

Basically, the compiler does lsh/arsh for "int" value to do the comparison and then get this value does a lsh/rsh.
So it looks like we still need arsh.


That's certainly suboptimal and very much non obvious to program
developers, but at least it can unblock the bpf_get_stack part
landing and proper ARSH support can be added later?
Just a thought.

+               return 0;
+
+       total_size = usize + ksize;
+       if (total_size > 0 && total_size <= max_len)
+               bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size);
+
+       return 0;
+}

the rest of the test looks great. Thank you for adding such exhaustive test.

Reply via email to