This patch implements the feature that the tracing_max_latency file,
e.g. /sys/kernel/debug/tracing/tracing_max_latency will receive
notifications through the fsnotify framework when a new latency is
available.

One particularly interesting use of this facility is when enabling
threshold tracing, through /sys/kernel/debug/tracing/tracing_thresh,
together with the preempt/irqsoff tracers. This makes it possible to
implement a user space program that can, with equal probability,
obtain traces of latencies that occur immediately after each other in
spite of the fact that the preempt/irqsoff tracers operate in overwrite
mode.

This facility works with the hwlat, preempt/irqsoff, and wakeup
tracers.

The tracers may call the latency_fsnotify() from places such as
__schedule() or do_idle(); this makes it impossible to call
queue_work() directly without risking a deadlock. The same would
happen with a softirq,  kernel thread or tasklet. For this reason we
use the irq_work mechanism to call queue_work().

Signed-off-by: Viktor Rosendahl <viktor.rosend...@gmail.com>
---
 kernel/trace/trace.c       | 75 +++++++++++++++++++++++++++++++++++++-
 kernel/trace/trace.h       | 18 +++++++++
 kernel/trace/trace_hwlat.c |  4 +-
 3 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 563e80f9006a..72ac20c4aaa1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -44,6 +44,9 @@
 #include <linux/trace.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/rt.h>
+#include <linux/fsnotify.h>
+#include <linux/irq_work.h>
+#include <linux/workqueue.h>
 
 #include "trace.h"
 #include "trace_output.h"
@@ -1480,6 +1483,74 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, 
void *buf, size_t cnt)
 
 unsigned long __read_mostly    tracing_thresh;
 
+#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
+       defined(CONFIG_FSNOTIFY)
+
+static const struct file_operations tracing_max_lat_fops;
+static struct workqueue_struct *fsnotify_wq;
+
+static void latency_fsnotify_workfn(struct work_struct *work)
+{
+       struct trace_array *tr = container_of(work, struct trace_array,
+                                             fsnotify_work);
+       fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
+                tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+}
+
+static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
+{
+       struct trace_array *tr = container_of(iwork, struct trace_array,
+                                             fsnotify_irqwork);
+       queue_work(fsnotify_wq, &tr->fsnotify_work);
+}
+
+static void trace_create_maxlat_file(struct trace_array *tr,
+                                    struct dentry *d_tracer)
+{
+       INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
+       init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
+       tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
+                                             d_tracer, &tr->max_latency,
+                                             &tracing_max_lat_fops);
+}
+
+__init static int latency_fsnotify_init(void)
+{
+       fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
+                                     WQ_UNBOUND | WQ_HIGHPRI, 0);
+       if (!fsnotify_wq) {
+               pr_err("Unable to allocate tr_max_lat_wq\n");
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+late_initcall_sync(latency_fsnotify_init);
+
+void latency_fsnotify(struct trace_array *tr)
+{
+       if (!fsnotify_wq)
+               return;
+       /*
+        * We cannot call queue_work(&tr->fsnotify_work) from here because it's
+        * possible that we are called from __schedule() or do_idle(), which
+        * could cause a deadlock.
+        */
+       irq_work_queue(&tr->fsnotify_irqwork);
+}
+
+/*
+ * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
+ *  defined(CONFIG_FSNOTIFY)
+ */
+#else
+
+#define trace_create_maxlat_file(tr, d_tracer)                         \
+       trace_create_file("tracing_max_latency", 0644, d_tracer,        \
+                         &tr->max_latency, &tracing_max_lat_fops)
+
+#endif
+
 #ifdef CONFIG_TRACER_MAX_TRACE
 /*
  * Copy the new maximum trace into the separate maximum-trace
@@ -1518,6 +1589,7 @@ __update_max_tr(struct trace_array *tr, struct 
task_struct *tsk, int cpu)
 
        /* record this tasks comm */
        tracing_record_cmdline(tsk);
+       latency_fsnotify(tr);
 }
 
 /**
@@ -8550,8 +8622,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry 
*d_tracer)
        create_trace_options_dir(tr);
 
 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
-       trace_create_file("tracing_max_latency", 0644, d_tracer,
-                       &tr->max_latency, &tracing_max_lat_fops);
+       trace_create_maxlat_file(tr, d_tracer);
 #endif
 
        if (ftrace_create_function_files(tr, d_tracer))
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 005f08629b8b..4913ed1138aa 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -16,6 +16,8 @@
 #include <linux/trace_events.h>
 #include <linux/compiler.h>
 #include <linux/glob.h>
+#include <linux/irq_work.h>
+#include <linux/workqueue.h>
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 #include <asm/unistd.h>                /* For NR_SYSCALLS           */
@@ -264,6 +266,11 @@ struct trace_array {
 #endif
 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
        unsigned long           max_latency;
+#ifdef CONFIG_FSNOTIFY
+       struct dentry           *d_max_latency;
+       struct work_struct      fsnotify_work;
+       struct irq_work         fsnotify_irqwork;
+#endif
 #endif
        struct trace_pid_list   __rcu *filtered_pids;
        /*
@@ -785,6 +792,17 @@ void update_max_tr_single(struct trace_array *tr,
                          struct task_struct *tsk, int cpu);
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
+#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
+       defined(CONFIG_FSNOTIFY)
+
+void latency_fsnotify(struct trace_array *tr);
+
+#else
+
+#define latency_fsnotify(tr)     do { } while (0)
+
+#endif
+
 #ifdef CONFIG_STACKTRACE
 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
                   int pc);
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index fa95139445b2..9c379261ee89 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -254,8 +254,10 @@ static int get_sample(void)
                trace_hwlat_sample(&s);
 
                /* Keep a running maximum ever recorded hardware latency */
-               if (sample > tr->max_latency)
+               if (sample > tr->max_latency) {
                        tr->max_latency = sample;
+                       latency_fsnotify(tr);
+               }
        }
 
 out:
-- 
2.17.1

Reply via email to