On Wed, 22 May 2019 02:30:14 +0200
Viktor Rosendahl <[email protected]> wrote:
> 
> I can try to add the static branch if you want it though.

Yes, it would need a static branch to prevent overhead.

> 
> best regards,
> 
> Viktor
> ---
>  include/linux/ftrace.h     |  13 +++++
>  kernel/sched/core.c        |   2 +
>  kernel/sched/idle.c        |   2 +
>  kernel/sched/sched.h       |   1 +
>  kernel/trace/trace.c       | 111 ++++++++++++++++++++++++++++++++++++-
>  kernel/trace/trace.h       |  12 ++++
>  kernel/trace/trace_hwlat.c |   4 +-
>  7 files changed, 142 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 25e2995d4a4c..88c76f23277c 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -907,4 +907,17 @@ unsigned long arch_syscall_addr(int nr);
>  
>  #endif /* CONFIG_FTRACE_SYSCALLS */
>  
> +#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
> +     defined(CONFIG_FSNOTIFY)
> +
> +void trace_disable_fsnotify(void);
> +void trace_enable_fsnotify(void);
> +
> +#else
> +
> +#define trace_disable_fsnotify() do { } while (0)
> +#define trace_enable_fsnotify() do { } while (0)
> +
> +#endif
> +
>  #endif /* _LINUX_FTRACE_H */
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 874c427742a9..440cd1a62722 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3374,6 +3374,7 @@ static void __sched notrace __schedule(bool preempt)
>       struct rq *rq;
>       int cpu;
>  
> +     trace_disable_fsnotify();

Also, don't use "trace_*" names, I'm trying to reserve them for tracepoints 
only.

        latency_fsnotify_disable();

perhaps?

>       cpu = smp_processor_id();
>       rq = cpu_rq(cpu);
>       prev = rq->curr;
> @@ -3449,6 +3450,7 @@ static void __sched notrace __schedule(bool preempt)
>       }
>  
>       balance_callback(rq);
> +     trace_enable_fsnotify();

        latency_fsnotify_enable();

>  }
>  
>  void __noreturn do_task_dead(void)


> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index b52ed1ada0be..e22871d489a5 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -46,6 +46,7 @@
>  #include <linux/debugfs.h>
>  #include <linux/delayacct.h>
>  #include <linux/energy_model.h>
> +#include <linux/ftrace.h>
>  #include <linux/init_task.h>
>  #include <linux/kprobes.h>
>  #include <linux/kthread.h>
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 2c92b3d9ea30..5dcc1147cbcc 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -44,6 +44,8 @@
>  #include <linux/trace.h>
>  #include <linux/sched/clock.h>
>  #include <linux/sched/rt.h>
> +#include <linux/fsnotify.h>
> +#include <linux/workqueue.h>
>  
>  #include "trace.h"
>  #include "trace_output.h"
> @@ -1481,6 +1483,111 @@ static ssize_t trace_seq_to_buffer(struct trace_seq 
> *s, void *buf, size_t cnt)
>  
>  unsigned long __read_mostly  tracing_thresh;
>  
> +#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
> +     defined(CONFIG_FSNOTIFY)
> +
> +static const struct file_operations tracing_max_lat_fops;
> +static struct workqueue_struct *fsnotify_wq;
> +static DEFINE_PER_CPU(atomic_t, notify_disabled) = ATOMIC_INIT(0);

per cpu atomics is a bit of an overkill. Why the atomic if they are
only used per cpu?

Just make them per cpu, and use things like this_cpu_inc/dec();

> +static DEFINE_PER_CPU(struct llist_head, notify_list);
> +
> +static void trace_fsnotify_workfn(struct work_struct *work)
> +{
> +     struct trace_array *tr = container_of(work, struct trace_array,
> +                                           fsnotify_work);
> +     fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
> +              tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
> +}
> +
> +static void trace_create_maxlat_file(struct trace_array *tr,
> +                                   struct dentry *d_tracer)
> +{
> +     INIT_WORK(&tr->fsnotify_work, trace_fsnotify_workfn);
> +     atomic_set(&tr->notify_pending, 0);
> +     tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
> +                                           d_tracer, &tr->max_latency,
> +                                           &tracing_max_lat_fops);
> +}
> +
> +/*
> + * Disable fsnotify while in scheduler and idle code. Trying wake anything up
> + * from there, such as calling queue_work() is prone to deadlock.
> + */
> +void trace_disable_fsnotify(void)
> +{
> +     int cpu;
> +
> +     cpu = smp_processor_id();
> +     atomic_set(&per_cpu(notify_disabled, cpu), 1);
> +}

This should be a static inline function:

static inline void latency_fsnotify_disable(void)
{
        this_cpu_inc(latency_trace_notify_disable);
}

> +EXPORT_SYMBOL(trace_disable_fsnotify);
> +
> +void trace_enable_fsnotify(void)

Also this needs to be split as a static inline and a function call.

static inline void latency_fsnotify_enable(void)
{
        this_cpu_dec(latency_trace_notify_disable);
        if (static_key_false(&latency_notify_key))
                lantency_fsnotify_process();
}

Have the static_key enabled by the latency tracers that modify the file.

In this file:

void latency_fsontify_process(void)
{

> +{
> +     int cpu;
> +     struct trace_array *tr;
> +     struct llist_head *list;
> +     struct llist_node *node;
> +
> +     cpu = smp_processor_id();
> +     atomic_set(&per_cpu(notify_disabled, cpu), 0);
> +
> +     list = &per_cpu(notify_list, cpu);

        list = &this_cpu_read(notify_list);

-- Steve

> +     for (node = llist_del_first(list); node != NULL;
> +          node = llist_del_first(list)) {
> +             tr = llist_entry(node, struct trace_array, notify_ll);
> +             atomic_set(&tr->notify_pending, 0);
> +             queue_work(fsnotify_wq, &tr->fsnotify_work);
> +     }
> +}
> +EXPORT_SYMBOL(trace_enable_fsnotify);
> +
> +__init static int trace_maxlat_fsnotify_init(void)
> +{
> +     fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
> +                                   WQ_UNBOUND | WQ_HIGHPRI, 0);
> +     if (!fsnotify_wq) {
> +             pr_err("Unable to allocate tr_max_lat_wq\n");
> +             return -ENOMEM;
> +     }
> +     return 0;
> +}
> +
> +late_initcall_sync(trace_maxlat_fsnotify_init);
> +
> +void trace_maxlat_fsnotify(struct trace_array *tr)
> +{
> +     int cpu;
> +
> +     if (!fsnotify_wq)
> +             return;
> +
> +     cpu = smp_processor_id();
> +     if (!atomic_read(&per_cpu(notify_disabled, cpu)))
> +             queue_work(fsnotify_wq, &tr->fsnotify_work);
> +     else {
> +             /*
> +              * notify_pending prevents us from adding the same entry to
> +              * more than one notify_list. It will get queued in
> +              * trace_enable_fsnotify()
> +              */
> +             if (!atomic_xchg(&tr->notify_pending, 1))
> +                     llist_add(&tr->notify_ll, &per_cpu(notify_list, cpu));
> +     }
> +}
> +
> +/*
> + * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
> + *  defined(CONFIG_FSNOTIFY)
> + */
> +#else
> +
> +#define trace_create_maxlat_file(tr, d_tracer)                               
> \
> +     trace_create_file("tracing_max_latency", 0644, d_tracer,        \
> +                       &tr->max_latency, &tracing_max_lat_fops)
> +
> +#endif
> +
>  #ifdef CONFIG_TRACER_MAX_TRACE
>  /*
>   * Copy the new maximum trace into the separate maximum-trace
> @@ -1519,6 +1626,7 @@ __update_max_tr(struct trace_array *tr, struct 
> task_struct *tsk, int cpu)
>  
>       /* record this tasks comm */
>       tracing_record_cmdline(tsk);
> +     trace_maxlat_fsnotify(tr);
>  }
>  
>  /**
> @@ -8533,8 +8641,7 @@ init_tracer_tracefs(struct trace_array *tr, struct 
> dentry *d_tracer)
>       create_trace_options_dir(tr);
>  
>  #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
> -     trace_create_file("tracing_max_latency", 0644, d_tracer,
> -                     &tr->max_latency, &tracing_max_lat_fops);
> +     trace_create_maxlat_file(tr, d_tracer);
>  #endif
>  
>       if (ftrace_create_function_files(tr, d_tracer))
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 1974ce818ddb..f95027813296 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -17,6 +17,7 @@
>  #include <linux/compiler.h>
>  #include <linux/trace_seq.h>
>  #include <linux/glob.h>
> +#include <linux/workqueue.h>
>  
>  #ifdef CONFIG_FTRACE_SYSCALLS
>  #include <asm/unistd.h>              /* For NR_SYSCALLS           */
> @@ -265,6 +266,12 @@ struct trace_array {
>  #endif
>  #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
>       unsigned long           max_latency;
> +#ifdef CONFIG_FSNOTIFY
> +     struct dentry           *d_max_latency;
> +     struct work_struct      fsnotify_work;
> +     atomic_t                notify_pending;
> +     struct llist_node       notify_ll;
> +#endif
>  #endif
>       struct trace_pid_list   __rcu *filtered_pids;
>       /*
> @@ -786,6 +793,11 @@ void update_max_tr_single(struct trace_array *tr,
>                         struct task_struct *tsk, int cpu);
>  #endif /* CONFIG_TRACER_MAX_TRACE */
>  
> +#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
> +     defined(CONFIG_FSNOTIFY)
> +void trace_maxlat_fsnotify(struct trace_array *tr);
> +#endif
> +
>  #ifdef CONFIG_STACKTRACE
>  void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
>                  int pc);
> diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
> index 1e6db9cbe4dc..44a47f81d949 100644
> --- a/kernel/trace/trace_hwlat.c
> +++ b/kernel/trace/trace_hwlat.c
> @@ -254,8 +254,10 @@ static int get_sample(void)
>               trace_hwlat_sample(&s);
>  
>               /* Keep a running maximum ever recorded hardware latency */
> -             if (sample > tr->max_latency)
> +             if (sample > tr->max_latency) {
>                       tr->max_latency = sample;
> +                     trace_maxlat_fsnotify(tr);
> +             }
>       }
>  
>  out:

Reply via email to