Re: [bpf-next PATCH 3/3] bpf: add sample program to trace map events

2018-04-23 Thread Sebastiano Miano

On 20/04/2018 11:47, Jesper Dangaard Brouer wrote:

On Thu, 19 Apr 2018 17:27:37 -0700
Alexei Starovoitov <alexei.starovoi...@gmail.com> wrote:


On Wed, Apr 18, 2018 at 05:30:59PM +0200, Sebastiano Miano wrote:

This patch adds a sample program, called trace_map_events,
that shows how to capture map events and filter them based on
the map id.

...

+struct bpf_map_keyval_ctx {
+   u64 pad;// First 8 bytes are not accessible by bpf code
+   u32 type;   // offset:8;size:4; signed:0;
+   u32 key_len;// offset:12;   size:4; signed:0;
+   u32 key;// offset:16;   size:4; signed:0;
+   bool key_trunc; // offset:20;   size:1; signed:0;
+   u32 val_len;// offset:24;   size:4; signed:0;
+   u32 val;// offset:28;   size:4; signed:0;
+   bool val_trunc; // offset:32;   size:1; signed:0;
+   int ufd;// offset:36;   size:4; signed:1;
+   u32 id; // offset:40;   size:4; signed:0;
+};
+
+SEC("tracepoint/bpf/bpf_map_lookup_elem")
+int trace_bpf_map_lookup_elem(struct bpf_map_keyval_ctx *ctx)
+{
+   struct map_event_data data;
+   int cpu = bpf_get_smp_processor_id();
+   bool *filter;
+   u32 key = 0, map_id = ctx->id;
+
+   filter = bpf_map_lookup_elem(_events, );
+   if (!filter)
+   return 1;
+
+   if (!*filter)
+   goto send_event;
+
+   /*
+* If the map_id is not in the list of filtered
+* ids we immediately return
+*/
+   if (!bpf_map_lookup_elem(_ids, _id))
+   return 0;
+
+send_event:
+   data.map_id = map_id;
+   data.evnt_type = MAP_LOOKUP;
+   data.map_type = ctx->type;
+
+   bpf_perf_event_output(ctx, _event_trace, cpu, , sizeof(data));
+   return 0;
+}


looks like the purpose of the series is to create map notify mechanism
so some external user space daemon can snoop all bpf map operations
that all other processes and bpf programs are doing.
I think it would be way better to create a proper mechanism for that
with permissions.

tracepoints in the bpf core were introduced as introspection mechanism
for debugging. Right now we have better ways to do introspection
with ids, queries, etc that bpftool is using, so original purpose of
those tracepoints is gone and they actually rot.
Let's not repurpose them into this map notify logic.
I don't want tracepoints in the bpf core to become a stable interface
it will stiffen the development.


Well, I need it exactly for introspection and debugging, and just need
the missing ID (as it was introduced later).

Can we just drop the sample program then? I would likely not use the
sample program, because it is missing the PID+comm-name.


I agree with Jesper. We could drop this sample program but, do you think 
it is worth spending some time to create another sample program that 
shows the correct or suggested way to do it?


I believe it would be useful to have a program for this since I found it 
difficult to understand how to do it with the current 
documentation/samples and it seems I was not the only one hitting this 
problem.




For my use, I can simply use perf record to debug what programs are
changing the map ID:

   perf record -e bpf:bpf_map_* -a --filter 'id == 2'

This should be a fairly common troubleshooting step.  I want to
figure-out if anybody else (another userspace program) is changing my
map. This can easily be caused by two userspace control programs
running at the same time. Sysadm/scripts made a mistake and started two
instances. Without the map ID, I cannot know what map perf is talking
about...



That's in fact the real use case for the first two patches. Since bpf 
tracepoints are still a rather common (and easy to use) troubleshooting 
and monitoring tool why shouldn't we "enhance" their support with the 
newly added map/prog IDs?


[bpf-next PATCH 3/3] bpf: add sample program to trace map events

2018-04-18 Thread Sebastiano Miano
This patch adds a sample program, called trace_map_events,
that shows how to capture map events and filter them based on
the map id.

The program accepts a list of map IDs, via the -i command line
option, and filters all the map events related to those IDs (i.e.,
map_create/update/lookup/next_key).
If no IDs are specified, all map events are listed and no filtering
is performed.

Sample usage:

 # trace_map_events -i  -i  -i  ...

Signed-off-by: Sebastiano Miano <sebastiano.mi...@polito.it>
---
 samples/bpf/Makefile|4 
 samples/bpf/trace_map_events_kern.c |  225 +
 samples/bpf/trace_map_events_user.c |  314 +++
 3 files changed, 543 insertions(+)
 create mode 100644 samples/bpf/trace_map_events_kern.c
 create mode 100644 samples/bpf/trace_map_events_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 4d6a6ed..a7d52b6 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -15,6 +15,7 @@ hostprogs-y += tracex6
 hostprogs-y += tracex7
 hostprogs-y += test_probe_write_user
 hostprogs-y += trace_output
+hostprogs-y += trace_map_events
 hostprogs-y += lathist
 hostprogs-y += offwaketime
 hostprogs-y += spintest
@@ -65,6 +66,7 @@ tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o
 load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o
 test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o
 trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o
+trace_map_events-objs := bpf_load.o $(LIBBPF) trace_map_events_user.o
 lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o
 offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o
 spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o
@@ -111,6 +113,7 @@ always += tracex7_kern.o
 always += sock_flags_kern.o
 always += test_probe_write_user_kern.o
 always += trace_output_kern.o
+always += trace_map_events_kern.o
 always += tcbpf1_kern.o
 always += tcbpf2_kern.o
 always += tc_l2_redirect_kern.o
@@ -171,6 +174,7 @@ HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
 HOSTLOADLIBES_load_sock_ops += -lelf
 HOSTLOADLIBES_test_probe_write_user += -lelf
 HOSTLOADLIBES_trace_output += -lelf -lrt
+HOSTLOADLIBES_trace_map_events += -lelf -lrt
 HOSTLOADLIBES_lathist += -lelf
 HOSTLOADLIBES_offwaketime += -lelf
 HOSTLOADLIBES_spintest += -lelf
diff --git a/samples/bpf/trace_map_events_kern.c 
b/samples/bpf/trace_map_events_kern.c
new file mode 100644
index 000..f887b5b
--- /dev/null
+++ b/samples/bpf/trace_map_events_kern.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2018 Politecnico di Torino, Italy
+ *
+ * Author: Sebastiano Miano <sebastiano.mi...@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include 
+#include 
+#include "bpf_helpers.h"
+
+enum map_event_type {
+   MAP_CREATE = 0,
+   MAP_UPDATE = 1,
+   MAP_LOOKUP = 2,
+   MAP_NEXT_KEY = 3
+};
+
+struct map_event_data {
+   u32 map_id;
+   enum map_event_type evnt_type;
+   u32 map_type;
+};
+
+struct bpf_map_def SEC("maps") map_event_trace = {
+   .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+   .key_size = sizeof(int),
+   .value_size = sizeof(u32),
+   .max_entries = 64,
+};
+
+struct bpf_map_def SEC("maps") filtered_ids = {
+   .type = BPF_MAP_TYPE_HASH,
+   .key_size = sizeof(u32),
+   .value_size = sizeof(u32),
+   .max_entries = 64,
+};
+
+struct bpf_map_def SEC("maps") filter_events = {
+   .type = BPF_MAP_TYPE_ARRAY,
+   .key_size = sizeof(u32),
+   .value_size = sizeof(bool),
+   .max_entries = 1,
+};
+
+/*
+ * Tracepoint format: 
/sys/kernel/debug/tracing/events/bpf/bpf_map_create/format
+ * Code in:kernel/include/trace/events/bpf.h
+ */
+struct bpf_map_create_ctx {
+   u64 pad;// First 8 bytes are not accessible by bpf code
+   u32 type;   // offset:8;size:4; signed:0;
+   u32 size_key;   // offset:12;   size:4; signed:0;
+   u32 size_value; // offset:16;   size:4; signed:0;
+   u32 max_entries;// offset:20;   size:4; signed:0;
+   u32 flags;  // offset:24;   size:4; signed:0;
+   int ufd;// offset:28;   size:4; signed:1;
+   u32 id; // offset:32;   size:4; signed:0;
+};
+
+SEC("tracepoint/bpf/bpf_map_create")
+int trace_bpf_map_create(struct bpf_map_create_ctx *ctx)
+{
+   struct map_event_data data;
+   int cpu = bpf_get_smp_processor_id();
+   bool *filter;
+   u32 key = 0, map_id = ctx->id;
+
+   filter = bpf_map_lookup_elem(_events, );
+   if (!filter)
+   return 1;
+
+   if (!*filter)
+   goto send_event;
+
+   /*
+   

[bpf-next PATCH 2/3] bpf: add id to prog tracepoint

2018-04-18 Thread Sebastiano Miano
This patch adds the prog id to the bpf tracepoints
that can be used when monitoring or inspecting prog
related functions.

Signed-off-by: Sebastiano Miano <sebastiano.mi...@polito.it>
Suggested-by: Jesper Dangaard Brouer <bro...@redhat.com>
---
 include/trace/events/bpf.h |   15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h
index d7c9726..4ec19c5 100644
--- a/include/trace/events/bpf.h
+++ b/include/trace/events/bpf.h
@@ -65,16 +65,19 @@ DECLARE_EVENT_CLASS(bpf_prog_event,
TP_STRUCT__entry(
__array(u8, prog_tag, 8)
__field(u32, type)
+   __field(u32, id)
),
 
TP_fast_assign(
BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag));
memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
__entry->type = prg->type;
+   __entry->id   = prg->aux->id;
),
 
-   TP_printk("prog=%s type=%s",
+   TP_printk("prog=%s id=%u type=%s",
  __print_hex_str(__entry->prog_tag, 8),
+ __entry->id,
  __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB))
 );
 
@@ -102,6 +105,7 @@ TRACE_EVENT(bpf_prog_load,
__array(u8, prog_tag, 8)
__field(u32, type)
__field(int, ufd)
+   __field(u32, id)
),
 
TP_fast_assign(
@@ -109,10 +113,12 @@ TRACE_EVENT(bpf_prog_load,
memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
__entry->type = prg->type;
__entry->ufd  = ufd;
+   __entry->id   = prg->aux->id;
),
 
-   TP_printk("prog=%s type=%s ufd=%d",
+   TP_printk("prog=%s id=%u type=%s ufd=%d",
  __print_hex_str(__entry->prog_tag, 8),
+ __entry->id,
  __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB),
  __entry->ufd)
 );
@@ -161,6 +167,7 @@ DECLARE_EVENT_CLASS(bpf_obj_prog,
__array(u8, prog_tag, 8)
__field(int, ufd)
__string(path, pname->name)
+   __field(u32, id)
),
 
TP_fast_assign(
@@ -168,10 +175,12 @@ DECLARE_EVENT_CLASS(bpf_obj_prog,
memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
__assign_str(path, pname->name);
__entry->ufd = ufd;
+   __entry->id  = prg->aux->id;
),
 
-   TP_printk("prog=%s path=%s ufd=%d",
+   TP_printk("prog=%s id=%u path=%s ufd=%d",
  __print_hex_str(__entry->prog_tag, 8),
+ __entry->id,
  __get_str(path), __entry->ufd)
 );
 



[bpf-next PATCH 0/3] Add ID to bpf_map/prog tracepoints

2018-04-18 Thread Sebastiano Miano
The following series:
1) Add ID to both map and prog related tracepoints
2) Add a sample program that shows how to monitor and 
   filter map related events using their IDs.

---

Sebastiano Miano (3):
  bpf: add id to map tracepoint
  bpf: add id to prog tracepoint
  bpf: add sample program to trace map events


 include/trace/events/bpf.h  |   44 -
 samples/bpf/Makefile|4 
 samples/bpf/trace_map_events_kern.c |  217 
 samples/bpf/trace_map_events_user.c |  314 +++
 4 files changed, 569 insertions(+), 10 deletions(-)
 create mode 100644 samples/bpf/trace_map_events_kern.c
 create mode 100644 samples/bpf/trace_map_events_user.c

--


[bpf-next PATCH 1/3] bpf: add id to map tracepoint

2018-04-18 Thread Sebastiano Miano
This patch adds the map id to the bpf tracepoints
that can be used when monitoring or inspecting map
related functions.

Signed-off-by: Sebastiano Miano <sebastiano.mi...@polito.it>
Suggested-by: Jesper Dangaard Brouer <bro...@redhat.com>
---
 include/trace/events/bpf.h |   29 ++---
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h
index 1501856..d7c9726 100644
--- a/include/trace/events/bpf.h
+++ b/include/trace/events/bpf.h
@@ -130,6 +130,7 @@ TRACE_EVENT(bpf_map_create,
__field(u32, max_entries)
__field(u32, flags)
__field(int, ufd)
+   __field(u32, id)
),
 
TP_fast_assign(
@@ -139,9 +140,11 @@ TRACE_EVENT(bpf_map_create,
__entry->max_entries = map->max_entries;
__entry->flags   = map->map_flags;
__entry->ufd = ufd;
+   __entry->id  = map->id;
),
 
-   TP_printk("map type=%s ufd=%d key=%u val=%u max=%u flags=%x",
+   TP_printk("id=%u type=%s ufd=%d key=%u val=%u max=%u flags=%x",
+ __entry->id,
  __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
  __entry->ufd, __entry->size_key, __entry->size_value,
  __entry->max_entries, __entry->flags)
@@ -199,17 +202,20 @@ DECLARE_EVENT_CLASS(bpf_obj_map,
__field(u32, type)
__field(int, ufd)
__string(path, pname->name)
+   __field(u32, id)
),
 
TP_fast_assign(
__assign_str(path, pname->name);
__entry->type = map->map_type;
__entry->ufd  = ufd;
+   __entry->id   = map->id;
),
 
-   TP_printk("map type=%s ufd=%d path=%s",
- __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
- __entry->ufd, __get_str(path))
+   TP_printk("map id=%u type=%s ufd=%d path=%s",
+   __entry->id,
+   __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
+   __entry->ufd, __get_str(path))
 );
 
 DEFINE_EVENT(bpf_obj_map, bpf_obj_pin_map,
@@ -244,6 +250,7 @@ DECLARE_EVENT_CLASS(bpf_map_keyval,
__dynamic_array(u8, val, map->value_size)
__field(bool, val_trunc)
__field(int, ufd)
+   __field(u32, id)
),
 
TP_fast_assign(
@@ -255,9 +262,11 @@ DECLARE_EVENT_CLASS(bpf_map_keyval,
__entry->val_len   = min(map->value_size, 16U);
__entry->val_trunc = map->value_size != __entry->val_len;
__entry->ufd   = ufd;
+   __entry->id= map->id;
),
 
-   TP_printk("map type=%s ufd=%d key=[%s%s] val=[%s%s]",
+   TP_printk("map id=%d type=%s ufd=%d key=[%s%s] val=[%s%s]",
+ __entry->id,
  __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
  __entry->ufd,
  __print_hex(__get_dynamic_array(key), __entry->key_len),
@@ -295,6 +304,7 @@ TRACE_EVENT(bpf_map_delete_elem,
__dynamic_array(u8, key, map->key_size)
__field(bool, key_trunc)
__field(int, ufd)
+   __field(u32, id)
),
 
TP_fast_assign(
@@ -303,9 +313,11 @@ TRACE_EVENT(bpf_map_delete_elem,
__entry->key_len   = min(map->key_size, 16U);
__entry->key_trunc = map->key_size != __entry->key_len;
__entry->ufd   = ufd;
+   __entry->id= map->id;
),
 
-   TP_printk("map type=%s ufd=%d key=[%s%s]",
+   TP_printk("map id=%d type=%s ufd=%d key=[%s%s]",
+ __entry->id,
  __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
  __entry->ufd,
  __print_hex(__get_dynamic_array(key), __entry->key_len),
@@ -327,6 +339,7 @@ TRACE_EVENT(bpf_map_next_key,
__field(bool, key_trunc)
__field(bool, key_null)
__field(int, ufd)
+   __field(u32, id)
),
 
TP_fast_assign(
@@ -338,9 +351,11 @@ TRACE_EVENT(bpf_map_next_key,
__entry->key_len   = min(map->key_size, 16U);
__entry->key_trunc = map->key_size != __entry->key_len;
__entry->ufd   = ufd;
+   __entry->id= map->id;
),
 
-   TP_printk("map type=%s ufd=%d key=[%s%s] next=[%s%s]",
+   TP_printk("map id=%d type=%s ufd=%d key=[%s%s] next=[%s%s]",
+ __entry->id,
  __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
  __entry->ufd,
  __entry->key_null ? "NULL" : 
__print_hex(__get_dynamic_array(key),