[PATCH v4 tip 7/7] samples: bpf: IO latency analysis (iosnoop/heatmap)

2015-02-27 Thread Alexei Starovoitov
BPF C program attaches to blk_mq_start_request/blk_update_request kprobe events
to calculate IO latency.
For every completed block IO event it computes the time delta in nsec
and records in a histogram map: map[log10(delta)*10]++
User space reads this histogram map every 2 seconds and prints it as a 'heatmap'
using gray shades of text terminal. Black spaces have many events and white
spaces have very few events. Left most space is the smallest latency, right most
space is the largest latency in the range.

Usage:
$ sudo ./tracex3
and do 'sudo dd if=/dev/sda of=/dev/null' in other terminal.
Observe IO latencies and how different activity (like 'make kernel') affects it.

Similar experiments can be done for network transmit latencies, syscalls, etc

'-t' flag prints the heatmap using normal ascii characters:

$ sudo ./tracex3 -t
  heatmap of IO latency
  # - many events with this latency
- few events
|1us  |10us |100us|1ms  |10ms |100ms|1s   |10s
 *ooo. *O.#.# 221
  .  *# .   # 125
 ..   .o#*..# 55
.  . .  .  .#O  # 37
 .# # 175
   .#*. # 37
  # # 199
  .  . *#*. # 55
   *#..*# 42
  # # 266
  ...***Oo#*OO**o#* .   # 629
  # # 271
  . .#o* o.*o*  # 221
. . o* *#O..# 50

Signed-off-by: Alexei Starovoitov 
---
 samples/bpf/Makefile   |4 ++
 samples/bpf/tracex3_kern.c |   89 ++
 samples/bpf/tracex3_user.c |  150 
 3 files changed, 243 insertions(+)
 create mode 100644 samples/bpf/tracex3_kern.c
 create mode 100644 samples/bpf/tracex3_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 6dd272143733..dcd850546d52 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -8,6 +8,7 @@ hostprogs-y += sockex1
 hostprogs-y += sockex2
 hostprogs-y += tracex1
 hostprogs-y += tracex2
+hostprogs-y += tracex3
 
 test_verifier-objs := test_verifier.o libbpf.o
 test_maps-objs := test_maps.o libbpf.o
@@ -16,6 +17,7 @@ sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
 sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
 tracex1-objs := bpf_load.o libbpf.o tracex1_user.o
 tracex2-objs := bpf_load.o libbpf.o tracex2_user.o
+tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -23,6 +25,7 @@ always += sockex1_kern.o
 always += sockex2_kern.o
 always += tracex1_kern.o
 always += tracex2_kern.o
+always += tracex3_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
@@ -31,6 +34,7 @@ HOSTLOADLIBES_sockex1 += -lelf
 HOSTLOADLIBES_sockex2 += -lelf
 HOSTLOADLIBES_tracex1 += -lelf
 HOSTLOADLIBES_tracex2 += -lelf
+HOSTLOADLIBES_tracex3 += -lelf
 
 # point this to your LLVM backend with bpf support
 LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c
new file mode 100644
index ..64f1c0b01a30
--- /dev/null
+++ b/samples/bpf/tracex3_kern.c
@@ -0,0 +1,89 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include 
+#include 
+#include 
+#include 
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+   .type = BPF_MAP_TYPE_HASH,
+   .key_size = sizeof(long),
+   .value_size = sizeof(u64),
+   .max_entries = 4096,
+};
+
+/* kprobe is NOT a stable ABI
+ * This bpf+kprobe example can stop working any time.
+ */
+SEC("events/kprobes/blk_mq_start_request")
+int bpf_prog1(struct pt_regs *ctx)
+{
+   long rq = ctx->di;
+   u64 val = bpf_ktime_get_ns();
+
+   bpf_map_update_elem(_map, , , BPF_ANY);
+   return 0;
+}
+
+static unsigned int log2l(unsigned long long n)
+{
+#define S(k) if (n >= (1ull << k)) { i += k; n >>= k; }
+   int i = -(n == 0);
+   S(32); S(16); S(8); S(4); S(2); S(1);
+   return i;
+#undef S
+}
+
+#define SLOTS 100
+
+struct bpf_map_def SEC("maps") lat_map = {
+   .type = BPF_MAP_TYPE_ARRAY,
+   .key_size = sizeof(u32),
+

[PATCH v4 tip 7/7] samples: bpf: IO latency analysis (iosnoop/heatmap)

2015-02-27 Thread Alexei Starovoitov
BPF C program attaches to blk_mq_start_request/blk_update_request kprobe events
to calculate IO latency.
For every completed block IO event it computes the time delta in nsec
and records in a histogram map: map[log10(delta)*10]++
User space reads this histogram map every 2 seconds and prints it as a 'heatmap'
using gray shades of text terminal. Black spaces have many events and white
spaces have very few events. Left most space is the smallest latency, right most
space is the largest latency in the range.

Usage:
$ sudo ./tracex3
and do 'sudo dd if=/dev/sda of=/dev/null' in other terminal.
Observe IO latencies and how different activity (like 'make kernel') affects it.

Similar experiments can be done for network transmit latencies, syscalls, etc

'-t' flag prints the heatmap using normal ascii characters:

$ sudo ./tracex3 -t
  heatmap of IO latency
  # - many events with this latency
- few events
|1us  |10us |100us|1ms  |10ms |100ms|1s   |10s
 *ooo. *O.#.# 221
  .  *# .   # 125
 ..   .o#*..# 55
.  . .  .  .#O  # 37
 .# # 175
   .#*. # 37
  # # 199
  .  . *#*. # 55
   *#..*# 42
  # # 266
  ...***Oo#*OO**o#* .   # 629
  # # 271
  . .#o* o.*o*  # 221
. . o* *#O..# 50

Signed-off-by: Alexei Starovoitov a...@plumgrid.com
---
 samples/bpf/Makefile   |4 ++
 samples/bpf/tracex3_kern.c |   89 ++
 samples/bpf/tracex3_user.c |  150 
 3 files changed, 243 insertions(+)
 create mode 100644 samples/bpf/tracex3_kern.c
 create mode 100644 samples/bpf/tracex3_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 6dd272143733..dcd850546d52 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -8,6 +8,7 @@ hostprogs-y += sockex1
 hostprogs-y += sockex2
 hostprogs-y += tracex1
 hostprogs-y += tracex2
+hostprogs-y += tracex3
 
 test_verifier-objs := test_verifier.o libbpf.o
 test_maps-objs := test_maps.o libbpf.o
@@ -16,6 +17,7 @@ sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
 sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
 tracex1-objs := bpf_load.o libbpf.o tracex1_user.o
 tracex2-objs := bpf_load.o libbpf.o tracex2_user.o
+tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -23,6 +25,7 @@ always += sockex1_kern.o
 always += sockex2_kern.o
 always += tracex1_kern.o
 always += tracex2_kern.o
+always += tracex3_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
@@ -31,6 +34,7 @@ HOSTLOADLIBES_sockex1 += -lelf
 HOSTLOADLIBES_sockex2 += -lelf
 HOSTLOADLIBES_tracex1 += -lelf
 HOSTLOADLIBES_tracex2 += -lelf
+HOSTLOADLIBES_tracex3 += -lelf
 
 # point this to your LLVM backend with bpf support
 LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c
new file mode 100644
index ..64f1c0b01a30
--- /dev/null
+++ b/samples/bpf/tracex3_kern.c
@@ -0,0 +1,89 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include linux/skbuff.h
+#include linux/netdevice.h
+#include linux/version.h
+#include uapi/linux/bpf.h
+#include bpf_helpers.h
+
+struct bpf_map_def SEC(maps) my_map = {
+   .type = BPF_MAP_TYPE_HASH,
+   .key_size = sizeof(long),
+   .value_size = sizeof(u64),
+   .max_entries = 4096,
+};
+
+/* kprobe is NOT a stable ABI
+ * This bpf+kprobe example can stop working any time.
+ */
+SEC(events/kprobes/blk_mq_start_request)
+int bpf_prog1(struct pt_regs *ctx)
+{
+   long rq = ctx-di;
+   u64 val = bpf_ktime_get_ns();
+
+   bpf_map_update_elem(my_map, rq, val, BPF_ANY);
+   return 0;
+}
+
+static unsigned int log2l(unsigned long long n)
+{
+#define S(k) if (n = (1ull  k)) { i += k; n = k; }
+   int i = -(n == 0);
+   S(32); S(16); S(8); S(4); S(2); S(1);
+   return i;
+#undef S
+}
+
+#define SLOTS 100
+
+struct bpf_map_def SEC(maps) lat_map = {
+