Currently there is one global "latency_lock" to cover both the
global and per-task latency data updating. Splitting it into one
global lock and per-task one will improve lock's granularity and
reduce the contention.

Cc: Arjan van de Ven <[email protected]>
Signed-off-by: Feng Tang <[email protected]>
---
 include/linux/sched.h | 1 +
 init/init_task.c      | 3 +++
 kernel/fork.c         | 4 ++++
 kernel/latencytop.c   | 9 +++++----
 4 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f9b43c9..84cf13c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1095,6 +1095,7 @@ struct task_struct {
        unsigned long                   dirty_paused_when;
 
 #ifdef CONFIG_LATENCYTOP
+       raw_spinlock_t                  latency_lock;
        int                             latency_record_count;
        struct latency_record           latency_record[LT_SAVECOUNT];
 #endif
diff --git a/init/init_task.c b/init/init_task.c
index 5aebe3b..f7cc0fb 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -158,6 +158,9 @@ struct task_struct init_task
        .numa_group     = NULL,
        .numa_faults    = NULL,
 #endif
+#ifdef CONFIG_LATENCYTOP
+       .latency_lock   = __RAW_SPIN_LOCK_UNLOCKED(init_task.latency_lock),
+#endif
 #ifdef CONFIG_KASAN
        .kasan_depth    = 1,
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index b69248e..2109468 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1963,6 +1963,10 @@ static __latent_entropy struct task_struct *copy_process(
 #ifdef TIF_SYSCALL_EMU
        clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
 #endif
+
+#ifdef CONFIG_LATENCYTOP
+       raw_spin_lock_init(&p->latency_lock);
+#endif
        clear_all_latency_tracing(p);
 
        /* ok, now we should be set up.. */
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 96b4179..6d7a174 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -74,10 +74,10 @@ void clear_all_latency_tracing(struct task_struct *p)
        if (!latencytop_enabled)
                return;
 
-       raw_spin_lock_irqsave(&latency_lock, flags);
+       raw_spin_lock_irqsave(&p->latency_lock, flags);
        memset(&p->latency_record, 0, sizeof(p->latency_record));
        p->latency_record_count = 0;
-       raw_spin_unlock_irqrestore(&latency_lock, flags);
+       raw_spin_unlock_irqrestore(&p->latency_lock, flags);
 }
 
 static void clear_global_latency_tracing(void)
@@ -194,9 +194,10 @@ __account_scheduler_latency(struct task_struct *tsk, int 
usecs, int inter)
        store_stacktrace(tsk, &lat);
 
        raw_spin_lock_irqsave(&latency_lock, flags);
-
        account_global_scheduler_latency(tsk, &lat);
+       raw_spin_unlock_irqrestore(&latency_lock, flags);
 
+       raw_spin_lock_irqsave(&tsk->latency_lock, flags);
        for (i = 0; i < tsk->latency_record_count; i++) {
                struct latency_record *mylat;
                int same = 1;
@@ -234,7 +235,7 @@ __account_scheduler_latency(struct task_struct *tsk, int 
usecs, int inter)
        memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));
 
 out_unlock:
-       raw_spin_unlock_irqrestore(&latency_lock, flags);
+       raw_spin_unlock_irqrestore(&tsk->latency_lock, flags);
 }
 
 static int lstats_show(struct seq_file *m, void *v)
-- 
2.7.4

Reply via email to