From: Steven Rostedt <[email protected]>

Add option to trigger perf events to function tracing.

Two new options are added: func-cpu-cycles and func-cache-misses

 # cd /sys/kernel/tracing
 # echo 1 > options/func-cache-misses
 # echo function > current_tracer
 # cat trace
[..]
    sshd-session-1014    [005] .....   327.836708: __x64_sys_read 
<-do_syscall_64
    sshd-session-1014    [005] .....   327.836708: cache_misses: 741719054
    sshd-session-1014    [005] .....   327.836712: ksys_read <-do_syscall_64
    sshd-session-1014    [005] .....   327.836713: cache_misses: 741720271
    sshd-session-1014    [005] .....   327.836716: fdget_pos <-ksys_read
    sshd-session-1014    [005] .....   327.836717: cache_misses: 741721483
    sshd-session-1014    [005] .....   327.836720: vfs_read <-ksys_read
    sshd-session-1014    [005] .....   327.836721: cache_misses: 741722726
    sshd-session-1014    [005] .....   327.836724: rw_verify_area <-vfs_read
    sshd-session-1014    [005] .....   327.836725: cache_misses: 741723940
    sshd-session-1014    [005] .....   327.836728: security_file_permission 
<-rw_verify_area
    sshd-session-1014    [005] .....   327.836729: cache_misses: 741725151

The option will cause the perf event to be triggered after every function
called.

If cpu_cycles is also enabled:

 # echo 1 > options/func-cpu-cycles
 # cat trace
[..]
    sshd-session-1014    [005] b..1.   536.844538: preempt_count_sub 
<-_raw_spin_unlock
    sshd-session-1014    [005] b..1.   536.844539: cpu_cycles: 1919425978 
cache_misses: 3431216952
    sshd-session-1014    [005] b....   536.844545: validate_xmit_skb_list 
<-sch_direct_xmit
    sshd-session-1014    [005] b....   536.844545: cpu_cycles: 1919429935 
cache_misses: 3431218535
    sshd-session-1014    [005] b....   536.844551: validate_xmit_skb.isra.0 
<-validate_xmit_skb_list
    sshd-session-1014    [005] b....   536.844552: cpu_cycles: 1919433763 
cache_misses: 3431220112
    sshd-session-1014    [005] b....   536.844557: netif_skb_features 
<-validate_xmit_skb.isra.0
    sshd-session-1014    [005] b....   536.844558: cpu_cycles: 1919437574 
cache_misses: 3431221688

Signed-off-by: Steven Rostedt (Google) <[email protected]>
---
 include/linux/trace_recursion.h |   5 +-
 kernel/trace/trace.c            |  58 ++++++++++++---
 kernel/trace/trace.h            |   6 ++
 kernel/trace/trace_functions.c  | 124 ++++++++++++++++++++++++++++++--
 4 files changed, 178 insertions(+), 15 deletions(-)

diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index ae04054a1be3..c42d86d81afa 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -132,9 +132,12 @@ static __always_inline int 
trace_test_and_set_recursion(unsigned long ip, unsign
                 * will think a recursion occurred, and the event will be 
dropped.
                 * Let a single instance happen via the TRANSITION_BIT to
                 * not drop those events.
+                *
+                * When ip is zero, the caller is purposely trying causing
+                * recursion. Don't record it.
                 */
                bit = TRACE_CTX_TRANSITION + start;
-               if (val & (1 << bit)) {
+               if ((val & (1 << bit)) && ip) {
                        do_ftrace_record_recursion(ip, pip);
                        return -1;
                }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 64d966a3ec8b..42bf1c046de1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2915,21 +2915,18 @@ void trace_event_buffer_commit(struct 
trace_event_buffer *fbuffer)
 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
 
 #ifdef CONFIG_PERF_EVENTS
-static inline void record_perf_event(struct trace_array *tr,
-                                    struct trace_buffer *buffer,
-                                    unsigned int trace_ctx)
+static inline void trace_perf_event(struct trace_array *tr,
+                                   struct trace_buffer *buffer,
+                                   int entries, u64 flags,
+                                   unsigned int trace_ctx)
 {
        struct ring_buffer_event *event;
        struct perf_event_entry *entry;
-       int entries = READ_ONCE(tr->perf_events);
        struct trace_array_cpu *data;
        u64 *value;
        int size;
        int cpu;
 
-       if (!entries)
-               return;
-
        guard(preempt_notrace)();
        cpu = smp_processor_id();
 
@@ -2949,12 +2946,12 @@ static inline void record_perf_event(struct trace_array 
*tr,
        entry                   = ring_buffer_event_data(event);
        value                   = entry->values;
 
-       if (tr->trace_flags & TRACE_ITER(PERF_CYCLES)) {
+       if (flags & TRACE_ITER(PERF_CYCLES)) {
                *value++ = TRACE_PERF_VALUE(PERF_TRACE_CYCLES);
                entries--;
        }
 
-       if (entries && tr->trace_flags & TRACE_ITER(PERF_CACHE)) {
+       if (entries && flags & TRACE_ITER(PERF_CACHE)) {
                *value++ = TRACE_PERF_VALUE(PERF_TRACE_CACHE);
                entries--;
        }
@@ -2968,6 +2965,49 @@ static inline void record_perf_event(struct trace_array 
*tr,
        local_dec(&data->disabled);
 }
 
+static inline void record_perf_event(struct trace_array *tr,
+                                    struct trace_buffer *buffer,
+                                    unsigned int trace_ctx)
+{
+       int entries = READ_ONCE(tr->perf_events);
+
+       if (!entries)
+               return;
+
+       trace_perf_event(tr, buffer, entries, tr->trace_flags, trace_ctx);
+}
+
+#ifdef CONFIG_FUNCTION_TRACER
+void ftrace_perf_events(struct trace_array *tr, int perf_events,
+                       u64 perf_mask, unsigned int trace_ctx)
+{
+       struct trace_buffer *buffer;
+       int bit;
+
+       /*
+        * Prevent any ftrace recursion.
+        * The ftrace_test_recursion_trylock() allows one nested loop
+        * to handle the case where an interrupt comes in and traces
+        * before the preempt_count is updated to the new context.
+        * This one instance allows that function to still be traced.
+        *
+        * The trace_perf_cache_misses() will call functions that function
+        * tracing will want to trace. Prevent this one loop from happening
+        * by taking the the lock again. If an interrupt comes in now,
+        * it may still be dropped, but there's really nothing that can
+        * be done about that until all those locations get fixed.
+        */
+       bit = ftrace_test_recursion_trylock(0, 0);
+
+       buffer = tr->array_buffer.buffer;
+       trace_perf_event(tr, buffer, perf_events, perf_mask, trace_ctx);
+
+       /* bit < 0 means the trylock failed and does not need to be unlocked */
+       if (bit >= 0)
+               ftrace_test_recursion_unlock(bit);
+}
+#endif
+
 static int handle_perf_event(struct trace_array *tr, u64 mask, int enabled)
 {
        int ret = 0;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 094a156b0c70..bb764a2255c7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -365,6 +365,8 @@ struct trace_array {
        int                     buffer_disabled;
 
        int                     perf_events;
+       int                     ftrace_perf_events;
+       u64                     ftrace_perf_mask;
 
        struct trace_pid_list   __rcu *filtered_pids;
        struct trace_pid_list   __rcu *filtered_no_pids;
@@ -1402,6 +1404,10 @@ extern int trace_get_user(struct trace_parser *parser, 
const char __user *ubuf,
 u64 do_trace_perf_event(int type);
 int trace_perf_event_enable(int type);
 void trace_perf_event_disable(int type);
+#ifdef CONFIG_FUNCTION_TRACER
+void ftrace_perf_events(struct trace_array *tr, int perf_events,
+                       u64 perf_mask, unsigned int trace_ctx);
+#endif
 #else
 # define PERF_FLAGS
 static inline u64 do_trace_perf_event(int type) { return 0; }
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index c12795c2fb39..97f46ac7ef21 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -47,8 +47,12 @@ enum {
        TRACE_FUNC_OPT_NO_REPEATS       = 0x2,
        TRACE_FUNC_OPT_ARGS             = 0x4,
 
-       /* Update this to next highest bit. */
-       TRACE_FUNC_OPT_HIGHEST_BIT      = 0x8
+       /* Update this to next highest function bit. */
+       TRACE_FUNC_OPT_HIGHEST_BIT      = 0x8,
+
+       /* These are just other options */
+       TRACE_FUNC_OPT_PERF_CYCLES      = 0x10,
+       TRACE_FUNC_OPT_PERF_CACHE       = 0x20,
 };
 
 #define TRACE_FUNC_OPT_MASK    (TRACE_FUNC_OPT_HIGHEST_BIT - 1)
@@ -143,6 +147,105 @@ static bool handle_func_repeats(struct trace_array *tr, 
u32 flags_val)
        return true;
 }
 
+#ifdef CONFIG_PERF_EVENTS
+static inline void
+do_trace_function(struct trace_array *tr, unsigned long ip,
+                 unsigned long parent_ip, unsigned int trace_ctx,
+                 struct ftrace_regs *fregs)
+{
+       trace_function(tr, ip, parent_ip, trace_ctx, fregs);
+
+       if (likely(!tr->ftrace_perf_events))
+               return;
+
+       ftrace_perf_events(tr, tr->ftrace_perf_events, tr->ftrace_perf_mask, 
trace_ctx);
+}
+
+static bool handle_perf_event_flag(struct trace_array *tr, int bit, int set, 
int *err)
+{
+       u64 mask;
+       int type;
+
+       *err = 0;
+
+       switch (bit) {
+       case TRACE_FUNC_OPT_PERF_CYCLES:
+               mask = TRACE_ITER(PERF_CYCLES);
+               type = PERF_TRACE_CYCLES;
+               break;
+
+       case TRACE_FUNC_OPT_PERF_CACHE:
+               mask = TRACE_ITER(PERF_CACHE);
+               type = PERF_TRACE_CACHE;
+               break;
+
+       default:
+               return 0;
+       }
+
+       if (set)
+               *err = trace_perf_event_enable(type);
+       else
+               trace_perf_event_disable(type);
+
+       if (*err < 0)
+               return 1;
+
+       if (set) {
+               tr->ftrace_perf_events++;
+               tr->ftrace_perf_mask |= mask;
+       } else {
+               tr->ftrace_perf_mask &= ~mask;
+               tr->ftrace_perf_events--;
+       }
+       return 1;
+}
+
+static void ftrace_perf_enable(struct trace_array *tr, int bit)
+{
+       int err;
+
+       if (!(tr->current_trace_flags->val & bit))
+               return;
+
+       handle_perf_event_flag(tr, bit, 1, &err);
+       if (err < 0)
+               tr->current_trace_flags->val &= ~bit;
+}
+
+static void ftrace_perf_disable(struct trace_array *tr, int bit)
+{
+       int err;
+
+       /* Only disable if it was enabled */
+       if (!(tr->current_trace_flags->val & bit))
+               return;
+
+       handle_perf_event_flag(tr, bit, 0, &err);
+}
+
+static void ftrace_perf_init(struct trace_array *tr)
+{
+       ftrace_perf_enable(tr, TRACE_FUNC_OPT_PERF_CYCLES);
+       ftrace_perf_enable(tr, TRACE_FUNC_OPT_PERF_CACHE);
+}
+
+static void ftrace_perf_reset(struct trace_array *tr)
+{
+       ftrace_perf_disable(tr, TRACE_FUNC_OPT_PERF_CYCLES);
+       ftrace_perf_disable(tr, TRACE_FUNC_OPT_PERF_CACHE);
+}
+#else
+#define do_trace_function trace_function
+static inline bool handle_perf_event_flag(struct trace_array *tr, int bit,
+                                         int set, int *err)
+{
+       return 0;
+}
+static inline void ftrace_perf_init(struct trace_array *tr) { }
+static inline void ftrace_perf_reset(struct trace_array *tr) { }
+#endif /* CONFIG_PERF_EVENTS */
+
 static int function_trace_init(struct trace_array *tr)
 {
        ftrace_func_t func;
@@ -165,6 +268,8 @@ static int function_trace_init(struct trace_array *tr)
 
        tr->array_buffer.cpu = raw_smp_processor_id();
 
+       ftrace_perf_init(tr);
+
        tracing_start_cmdline_record();
        tracing_start_function_trace(tr);
        return 0;
@@ -172,6 +277,7 @@ static int function_trace_init(struct trace_array *tr)
 
 static void function_trace_reset(struct trace_array *tr)
 {
+       ftrace_perf_reset(tr);
        tracing_stop_function_trace(tr);
        tracing_stop_cmdline_record();
        ftrace_reset_array_ops(tr);
@@ -223,7 +329,7 @@ function_trace_call(unsigned long ip, unsigned long 
parent_ip,
 
        trace_ctx = tracing_gen_ctx_dec();
 
-       trace_function(tr, ip, parent_ip, trace_ctx, NULL);
+       do_trace_function(tr, ip, parent_ip, trace_ctx, NULL);
 
        ftrace_test_recursion_unlock(bit);
 }
@@ -245,7 +351,7 @@ function_args_trace_call(unsigned long ip, unsigned long 
parent_ip,
 
        trace_ctx = tracing_gen_ctx();
 
-       trace_function(tr, ip, parent_ip, trace_ctx, fregs);
+       do_trace_function(tr, ip, parent_ip, trace_ctx, fregs);
 
        ftrace_test_recursion_unlock(bit);
 }
@@ -372,7 +478,7 @@ function_no_repeats_trace_call(unsigned long ip, unsigned 
long parent_ip,
        trace_ctx = tracing_gen_ctx_dec();
        process_repeats(tr, ip, parent_ip, last_info, trace_ctx);
 
-       trace_function(tr, ip, parent_ip, trace_ctx, NULL);
+       do_trace_function(tr, ip, parent_ip, trace_ctx, NULL);
 
 out:
        ftrace_test_recursion_unlock(bit);
@@ -428,6 +534,10 @@ static struct tracer_opt func_opts[] = {
        { TRACER_OPT(func-no-repeats, TRACE_FUNC_OPT_NO_REPEATS) },
 #ifdef CONFIG_FUNCTION_TRACE_ARGS
        { TRACER_OPT(func-args, TRACE_FUNC_OPT_ARGS) },
+#endif
+#if CONFIG_PERF_EVENTS
+       { TRACER_OPT(func-cpu-cycles, TRACE_FUNC_OPT_PERF_CYCLES) },
+       { TRACER_OPT(func-cache-misses, TRACE_FUNC_OPT_PERF_CACHE) },
 #endif
        { } /* Always set a last empty entry */
 };
@@ -457,6 +567,7 @@ func_set_flag(struct trace_array *tr, u32 old_flags, u32 
bit, int set)
 {
        ftrace_func_t func;
        u32 new_flags;
+       int err;
 
        /* Do nothing if already set. */
        if (!!set == !!(tr->current_trace_flags->val & bit))
@@ -466,6 +577,9 @@ func_set_flag(struct trace_array *tr, u32 old_flags, u32 
bit, int set)
        if (tr->current_trace != &function_trace)
                return 0;
 
+       if (handle_perf_event_flag(tr, bit, set, &err))
+               return err;
+
        new_flags = (tr->current_trace_flags->val & ~bit) | (set ? bit : 0);
        func = select_trace_function(new_flags);
        if (!func)
-- 
2.51.0



Reply via email to