Re: [PATCH V4 1/1] bpf: control events stored in PERF_EVENT_ARRAY maps trace data output when perf sampling

2015-10-19 Thread xiakaixu
于 2015/10/20 10:14, Alexei Starovoitov 写道:
> On 10/19/15 3:37 AM, Kaixu Xia wrote:
>> +/* flags for PERF_EVENT_ARRAY maps*/
>> +enum {
>> +BPF_EVENT_CTL_BIT_CUR = 0,
>> +BPF_EVENT_CTL_BIT_ALL = 1,
>> +__NR_BPF_EVENT_CTL_BITS,
>> +};
>> +
>> +#defineBPF_CTL_BIT_FLAG_MASK \
>> +((1ULL << __NR_BPF_EVENT_CTL_BITS) - 1)
>> +#defineBPF_CTL_BIT_DUMP_CUR \
>> +(1ULL << BPF_EVENT_CTL_BIT_CUR)
>> +#defineBPF_CTL_BIT_DUMP_ALL \
>> +(1ULL << BPF_EVENT_CTL_BIT_ALL)
>> +
> 
> the above shouldn't be part of uapi header. It can stay in bpf_trace.c
> Just document these bits next to helper similar to skb_store_bytes()
> 
> The rest looks ok.
> It still needs an ack from Peter for perf_event bits

Thanks for your comments!
This part will be moved to bpf_trace.c in next version.
> 
> 
> .
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V4 1/1] bpf: control events stored in PERF_EVENT_ARRAY maps trace data output when perf sampling

2015-10-19 Thread Alexei Starovoitov

On 10/19/15 3:37 AM, Kaixu Xia wrote:

+/* flags for PERF_EVENT_ARRAY maps*/
+enum {
+   BPF_EVENT_CTL_BIT_CUR = 0,
+   BPF_EVENT_CTL_BIT_ALL = 1,
+   __NR_BPF_EVENT_CTL_BITS,
+};
+
+#defineBPF_CTL_BIT_FLAG_MASK \
+   ((1ULL << __NR_BPF_EVENT_CTL_BITS) - 1)
+#defineBPF_CTL_BIT_DUMP_CUR \
+   (1ULL << BPF_EVENT_CTL_BIT_CUR)
+#defineBPF_CTL_BIT_DUMP_ALL \
+   (1ULL << BPF_EVENT_CTL_BIT_ALL)
+


the above shouldn't be part of uapi header. It can stay in bpf_trace.c
Just document these bits next to helper similar to skb_store_bytes()

The rest looks ok.
It still needs an ack from Peter for perf_event bits

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH V4 1/1] bpf: control events stored in PERF_EVENT_ARRAY maps trace data output when perf sampling

2015-10-19 Thread Kaixu Xia
This patch adds the flag soft_enable to control the trace data
output process when perf sampling. By setting this flag and
integrating with ebpf, we can control the data output process and
get the samples we are most interested in.

The bpf helper bpf_perf_event_control() can control either the perf
event on current cpu or all the perf events stored in the maps by
checking the third parameter 'flag'.

Signed-off-by: Kaixu Xia 
---
 include/linux/perf_event.h  |  1 +
 include/uapi/linux/bpf.h| 19 +++
 include/uapi/linux/perf_event.h |  3 ++-
 kernel/bpf/verifier.c   |  3 ++-
 kernel/events/core.c| 13 +++
 kernel/trace/bpf_trace.c| 51 +
 6 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 092a0e8..bb3bf87 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -472,6 +472,7 @@ struct perf_event {
struct irq_work pending;
 
atomic_tevent_limit;
+   atomic_tsoft_enable;
 
void (*destroy)(struct perf_event *);
struct rcu_head rcu_head;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 564f1f0..a2b0d9d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -132,6 +132,20 @@ enum bpf_prog_type {
 #define BPF_NOEXIST1 /* create new element if it didn't exist */
 #define BPF_EXIST  2 /* update existing element */
 
+/* flags for PERF_EVENT_ARRAY maps*/
+enum {
+   BPF_EVENT_CTL_BIT_CUR = 0,
+   BPF_EVENT_CTL_BIT_ALL = 1,
+   __NR_BPF_EVENT_CTL_BITS,
+};
+
+#defineBPF_CTL_BIT_FLAG_MASK \
+   ((1ULL << __NR_BPF_EVENT_CTL_BITS) - 1)
+#defineBPF_CTL_BIT_DUMP_CUR \
+   (1ULL << BPF_EVENT_CTL_BIT_CUR)
+#defineBPF_CTL_BIT_DUMP_ALL \
+   (1ULL << BPF_EVENT_CTL_BIT_ALL)
+
 union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32   map_type;   /* one of enum bpf_map_type */
@@ -287,6 +301,11 @@ enum bpf_func_id {
 * Return: realm if != 0
 */
BPF_FUNC_get_route_realm,
+
+   /**
+* u64 bpf_perf_event_control(, index, flag)
+*/
+   BPF_FUNC_perf_event_control,
__BPF_FUNC_MAX_ID,
 };
 
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 2881145..a791b03 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -331,7 +331,8 @@ struct perf_event_attr {
comm_exec  :  1, /* flag comm events that 
are due to an exec */
use_clockid:  1, /* use @clockid for time 
fields */
context_switch :  1, /* context switch data */
-   __reserved_1   : 37;
+   soft_disable   :  1, /* output data on samples 
by default */
+   __reserved_1   : 36;
 
union {
__u32   wakeup_events;/* wakeup every n events */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1d6b97b..ffec14b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -245,6 +245,7 @@ static const struct {
 } func_limit[] = {
{BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
+   {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_control},
 };
 
 static void print_verifier_state(struct verifier_env *env)
@@ -910,7 +911,7 @@ static int check_map_func_compatibility(struct bpf_map 
*map, int func_id)
 * don't allow any other map type to be passed into
 * the special func;
 */
-   if (bool_map != bool_func)
+   if (bool_func && bool_map != bool_func)
return -EINVAL;
}
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b11756f..5219635 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6337,6 +6337,9 @@ static int __perf_event_overflow(struct perf_event *event,
irq_work_queue(>pending);
}
 
+   if (unlikely(!atomic_read(>soft_enable)))
+   return 0;
+
if (event->overflow_handler)
event->overflow_handler(event, data, regs);
else
@@ -7709,6 +7712,14 @@ static void account_event(struct perf_event *event)
account_event_cpu(event, event->cpu);
 }
 
+static void perf_event_check_dump_flag(struct perf_event *event)
+{
+   if (event->attr.soft_disable == 1)
+   atomic_set(>soft_enable, 0);
+   else
+   atomic_set(>soft_enable, 1);
+}
+
 /*
  * Allocate and initialize a event structure
  */
@@ -7840,6 +7851,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,

[PATCH V4 1/1] bpf: control events stored in PERF_EVENT_ARRAY maps trace data output when perf sampling

2015-10-19 Thread Kaixu Xia
This patch adds the flag soft_enable to control the trace data
output process when perf sampling. By setting this flag and
integrating with ebpf, we can control the data output process and
get the samples we are most interested in.

The bpf helper bpf_perf_event_control() can control either the perf
event on current cpu or all the perf events stored in the maps by
checking the third parameter 'flag'.

Signed-off-by: Kaixu Xia 
---
 include/linux/perf_event.h  |  1 +
 include/uapi/linux/bpf.h| 19 +++
 include/uapi/linux/perf_event.h |  3 ++-
 kernel/bpf/verifier.c   |  3 ++-
 kernel/events/core.c| 13 +++
 kernel/trace/bpf_trace.c| 51 +
 6 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 092a0e8..bb3bf87 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -472,6 +472,7 @@ struct perf_event {
struct irq_work pending;
 
atomic_tevent_limit;
+   atomic_tsoft_enable;
 
void (*destroy)(struct perf_event *);
struct rcu_head rcu_head;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 564f1f0..a2b0d9d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -132,6 +132,20 @@ enum bpf_prog_type {
 #define BPF_NOEXIST1 /* create new element if it didn't exist */
 #define BPF_EXIST  2 /* update existing element */
 
+/* flags for PERF_EVENT_ARRAY maps*/
+enum {
+   BPF_EVENT_CTL_BIT_CUR = 0,
+   BPF_EVENT_CTL_BIT_ALL = 1,
+   __NR_BPF_EVENT_CTL_BITS,
+};
+
+#defineBPF_CTL_BIT_FLAG_MASK \
+   ((1ULL << __NR_BPF_EVENT_CTL_BITS) - 1)
+#defineBPF_CTL_BIT_DUMP_CUR \
+   (1ULL << BPF_EVENT_CTL_BIT_CUR)
+#defineBPF_CTL_BIT_DUMP_ALL \
+   (1ULL << BPF_EVENT_CTL_BIT_ALL)
+
 union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32   map_type;   /* one of enum bpf_map_type */
@@ -287,6 +301,11 @@ enum bpf_func_id {
 * Return: realm if != 0
 */
BPF_FUNC_get_route_realm,
+
+   /**
+* u64 bpf_perf_event_control(, index, flag)
+*/
+   BPF_FUNC_perf_event_control,
__BPF_FUNC_MAX_ID,
 };
 
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 2881145..a791b03 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -331,7 +331,8 @@ struct perf_event_attr {
comm_exec  :  1, /* flag comm events that 
are due to an exec */
use_clockid:  1, /* use @clockid for time 
fields */
context_switch :  1, /* context switch data */
-   __reserved_1   : 37;
+   soft_disable   :  1, /* output data on samples 
by default */
+   __reserved_1   : 36;
 
union {
__u32   wakeup_events;/* wakeup every n events */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1d6b97b..ffec14b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -245,6 +245,7 @@ static const struct {
 } func_limit[] = {
{BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
+   {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_control},
 };
 
 static void print_verifier_state(struct verifier_env *env)
@@ -910,7 +911,7 @@ static int check_map_func_compatibility(struct bpf_map 
*map, int func_id)
 * don't allow any other map type to be passed into
 * the special func;
 */
-   if (bool_map != bool_func)
+   if (bool_func && bool_map != bool_func)
return -EINVAL;
}
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b11756f..5219635 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6337,6 +6337,9 @@ static int __perf_event_overflow(struct perf_event *event,
irq_work_queue(>pending);
}
 
+   if (unlikely(!atomic_read(>soft_enable)))
+   return 0;
+
if (event->overflow_handler)
event->overflow_handler(event, data, regs);
else
@@ -7709,6 +7712,14 @@ static void account_event(struct perf_event *event)
account_event_cpu(event, event->cpu);
 }
 
+static void perf_event_check_dump_flag(struct perf_event *event)
+{
+   if (event->attr.soft_disable == 1)
+   atomic_set(>soft_enable, 0);
+   else
+   atomic_set(>soft_enable, 1);
+}
+
 /*
  * Allocate and initialize a event structure
  */
@@ -7840,6 +7851,8 @@ perf_event_alloc(struct perf_event_attr *attr, 

Re: [PATCH V4 1/1] bpf: control events stored in PERF_EVENT_ARRAY maps trace data output when perf sampling

2015-10-19 Thread xiakaixu
于 2015/10/20 10:14, Alexei Starovoitov 写道:
> On 10/19/15 3:37 AM, Kaixu Xia wrote:
>> +/* flags for PERF_EVENT_ARRAY maps*/
>> +enum {
>> +BPF_EVENT_CTL_BIT_CUR = 0,
>> +BPF_EVENT_CTL_BIT_ALL = 1,
>> +__NR_BPF_EVENT_CTL_BITS,
>> +};
>> +
>> +#defineBPF_CTL_BIT_FLAG_MASK \
>> +((1ULL << __NR_BPF_EVENT_CTL_BITS) - 1)
>> +#defineBPF_CTL_BIT_DUMP_CUR \
>> +(1ULL << BPF_EVENT_CTL_BIT_CUR)
>> +#defineBPF_CTL_BIT_DUMP_ALL \
>> +(1ULL << BPF_EVENT_CTL_BIT_ALL)
>> +
> 
> the above shouldn't be part of uapi header. It can stay in bpf_trace.c
> Just document these bits next to helper similar to skb_store_bytes()
> 
> The rest looks ok.
> It still needs an ack from Peter for perf_event bits

Thanks for your comments!
This part will be moved to bpf_trace.c in next version.
> 
> 
> .
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V4 1/1] bpf: control events stored in PERF_EVENT_ARRAY maps trace data output when perf sampling

2015-10-19 Thread Alexei Starovoitov

On 10/19/15 3:37 AM, Kaixu Xia wrote:

+/* flags for PERF_EVENT_ARRAY maps*/
+enum {
+   BPF_EVENT_CTL_BIT_CUR = 0,
+   BPF_EVENT_CTL_BIT_ALL = 1,
+   __NR_BPF_EVENT_CTL_BITS,
+};
+
+#defineBPF_CTL_BIT_FLAG_MASK \
+   ((1ULL << __NR_BPF_EVENT_CTL_BITS) - 1)
+#defineBPF_CTL_BIT_DUMP_CUR \
+   (1ULL << BPF_EVENT_CTL_BIT_CUR)
+#defineBPF_CTL_BIT_DUMP_ALL \
+   (1ULL << BPF_EVENT_CTL_BIT_ALL)
+


the above shouldn't be part of uapi header. It can stay in bpf_trace.c
Just document these bits next to helper similar to skb_store_bytes()

The rest looks ok.
It still needs an ack from Peter for perf_event bits

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/