Re: [PATCH net-next 2/6] bpf: introduce BPF_PROG_TYPE_PERF_EVENT program type

2016-08-29 Thread Daniel Borkmann

On 08/27/2016 04:31 AM, Alexei Starovoitov wrote:

Introduce BPF_PROG_TYPE_PERF_EVENT programs that can be attached to
HW and SW perf events (PERF_TYPE_HARDWARE and PERF_TYPE_SOFTWARE
correspondingly in uapi/linux/perf_event.h)

The program visible context meta structure is
struct bpf_perf_event_data {
 struct pt_regs regs;
  __u64 sample_period;
};
which is accessible directly from the program:
int bpf_prog(struct bpf_perf_event_data *ctx)
{
   ... ctx->sample_period ...
   ... ctx->regs.ip ...
}

The bpf verifier rewrites the accesses into kernel internal
struct bpf_perf_event_data_kern which allows changing
struct perf_sample_data without affecting bpf programs.
New fields can be added to the end of struct bpf_perf_event_data
in the future.

Signed-off-by: Alexei Starovoitov 


Two things I noticed below, otherwise for BPF bits:

Acked-by: Daniel Borkmann 

[...]


+static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type 
type,
+   enum bpf_reg_type *reg_type)
+{
+   if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
+   return false;
+   if (type != BPF_READ)
+   return false;
+   if (off % size != 0)
+   return false;
+   if (off == offsetof(struct bpf_perf_event_data, sample_period) &&
+   size != sizeof(u64))
+   return false;
+   if (size != sizeof(long))
+   return false;


Wouldn't this one rather need to be:

if (off == offsetof(struct bpf_perf_event_data, sample_period) {
if (size != sizeof(u64))
return false;
} else {
if (size != sizeof(long))
return false;
}

Otherwise on 32bit accessing sample_period might fail?


+   return true;
+}
+
+static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+ int src_reg, int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
+{
+   struct bpf_insn *insn = insn_buf;
+
+   switch (ctx_off) {
+   case offsetof(struct bpf_perf_event_data, sample_period):


Would be good to add a test as we usually have done:

BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != 8);


+   *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct 
bpf_perf_event_data_kern, data)),
+ dst_reg, src_reg,
+ offsetof(struct bpf_perf_event_data_kern, 
data));
+   *insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg,
+ offsetof(struct perf_sample_data, 
period));
+   break;
+   default:
+   *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct 
bpf_perf_event_data_kern, regs)),
+ dst_reg, src_reg,
+ offsetof(struct bpf_perf_event_data_kern, 
regs));
+   *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(long)),
+ dst_reg, dst_reg, ctx_off);
+   break;
+   }
+   return insn - insn_buf;
+}
+


[PATCH net-next 2/6] bpf: introduce BPF_PROG_TYPE_PERF_EVENT program type

2016-08-26 Thread Alexei Starovoitov
Introduce BPF_PROG_TYPE_PERF_EVENT programs that can be attached to
HW and SW perf events (PERF_TYPE_HARDWARE and PERF_TYPE_SOFTWARE
correspondingly in uapi/linux/perf_event.h)

The program visible context meta structure is
struct bpf_perf_event_data {
struct pt_regs regs;
 __u64 sample_period;
};
which is accessible directly from the program:
int bpf_prog(struct bpf_perf_event_data *ctx)
{
  ... ctx->sample_period ...
  ... ctx->regs.ip ...
}

The bpf verifier rewrites the accesses into kernel internal
struct bpf_perf_event_data_kern which allows changing
struct perf_sample_data without affecting bpf programs.
New fields can be added to the end of struct bpf_perf_event_data
in the future.

Signed-off-by: Alexei Starovoitov 
---
 include/linux/perf_event.h  |  5 
 include/uapi/linux/Kbuild   |  1 +
 include/uapi/linux/bpf.h|  1 +
 include/uapi/linux/bpf_perf_event.h | 18 
 kernel/trace/bpf_trace.c| 57 +
 5 files changed, 82 insertions(+)
 create mode 100644 include/uapi/linux/bpf_perf_event.h

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2b6b43cc0dd5..97bfe62f30d7 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -788,6 +788,11 @@ struct perf_output_handle {
int page;
 };
 
+struct bpf_perf_event_data_kern {
+   struct pt_regs *regs;
+   struct perf_sample_data *data;
+};
+
 #ifdef CONFIG_CGROUP_PERF
 
 /*
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 185f8ea2702f..d0352a971ebd 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -71,6 +71,7 @@ header-y += binfmts.h
 header-y += blkpg.h
 header-y += blktrace_api.h
 header-y += bpf_common.h
+header-y += bpf_perf_event.h
 header-y += bpf.h
 header-y += bpqether.h
 header-y += bsg.h
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e4c5a1baa993..f896dfac4ac0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -95,6 +95,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SCHED_ACT,
BPF_PROG_TYPE_TRACEPOINT,
BPF_PROG_TYPE_XDP,
+   BPF_PROG_TYPE_PERF_EVENT,
 };
 
 #define BPF_PSEUDO_MAP_FD  1
diff --git a/include/uapi/linux/bpf_perf_event.h 
b/include/uapi/linux/bpf_perf_event.h
new file mode 100644
index ..067427259820
--- /dev/null
+++ b/include/uapi/linux/bpf_perf_event.h
@@ -0,0 +1,18 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__
+#define _UAPI__LINUX_BPF_PERF_EVENT_H__
+
+#include 
+#include 
+
+struct bpf_perf_event_data {
+   struct pt_regs regs;
+   __u64 sample_period;
+};
+
+#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ad35213b8405..2af0688bedfc 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1,4 +1,5 @@
 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016 Facebook
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -8,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -552,10 +554,65 @@ static struct bpf_prog_type_list tracepoint_tl = {
.type   = BPF_PROG_TYPE_TRACEPOINT,
 };
 
+static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type 
type,
+   enum bpf_reg_type *reg_type)
+{
+   if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
+   return false;
+   if (type != BPF_READ)
+   return false;
+   if (off % size != 0)
+   return false;
+   if (off == offsetof(struct bpf_perf_event_data, sample_period) &&
+   size != sizeof(u64))
+   return false;
+   if (size != sizeof(long))
+   return false;
+   return true;
+}
+
+static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+ int src_reg, int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
+{
+   struct bpf_insn *insn = insn_buf;
+
+   switch (ctx_off) {
+   case offsetof(struct bpf_perf_event_data, sample_period):
+   *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct 
bpf_perf_event_data_kern, data)),
+ dst_reg, src_reg,
+ offsetof(struct bpf_perf_event_data_kern, 
data));
+   *insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg,
+ offsetof(struct