Currently there is one global "latency_lock" to cover both the global and per-task latency data updating. Splitting it into one global lock and per-task one will improve lock's granularity and reduce the contention.
Cc: Arjan van de Ven <[email protected]> Signed-off-by: Feng Tang <[email protected]> --- include/linux/sched.h | 1 + init/init_task.c | 3 +++ kernel/fork.c | 4 ++++ kernel/latencytop.c | 9 +++++---- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index f9b43c9..84cf13c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1095,6 +1095,7 @@ struct task_struct { unsigned long dirty_paused_when; #ifdef CONFIG_LATENCYTOP + raw_spinlock_t latency_lock; int latency_record_count; struct latency_record latency_record[LT_SAVECOUNT]; #endif diff --git a/init/init_task.c b/init/init_task.c index 5aebe3b..f7cc0fb 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -158,6 +158,9 @@ struct task_struct init_task .numa_group = NULL, .numa_faults = NULL, #endif +#ifdef CONFIG_LATENCYTOP + .latency_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.latency_lock), +#endif #ifdef CONFIG_KASAN .kasan_depth = 1, #endif diff --git a/kernel/fork.c b/kernel/fork.c index b69248e..2109468 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1963,6 +1963,10 @@ static __latent_entropy struct task_struct *copy_process( #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif + +#ifdef CONFIG_LATENCYTOP + raw_spin_lock_init(&p->latency_lock); +#endif clear_all_latency_tracing(p); /* ok, now we should be set up.. */ diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 96b4179..6d7a174 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -74,10 +74,10 @@ void clear_all_latency_tracing(struct task_struct *p) if (!latencytop_enabled) return; - raw_spin_lock_irqsave(&latency_lock, flags); + raw_spin_lock_irqsave(&p->latency_lock, flags); memset(&p->latency_record, 0, sizeof(p->latency_record)); p->latency_record_count = 0; - raw_spin_unlock_irqrestore(&latency_lock, flags); + raw_spin_unlock_irqrestore(&p->latency_lock, flags); } static void clear_global_latency_tracing(void) @@ -194,9 +194,10 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter) store_stacktrace(tsk, &lat); raw_spin_lock_irqsave(&latency_lock, flags); - account_global_scheduler_latency(tsk, &lat); + raw_spin_unlock_irqrestore(&latency_lock, flags); + raw_spin_lock_irqsave(&tsk->latency_lock, flags); for (i = 0; i < tsk->latency_record_count; i++) { struct latency_record *mylat; int same = 1; @@ -234,7 +235,7 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter) memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record)); out_unlock: - raw_spin_unlock_irqrestore(&latency_lock, flags); + raw_spin_unlock_irqrestore(&tsk->latency_lock, flags); } static int lstats_show(struct seq_file *m, void *v) -- 2.7.4

