When I was running my testcase which may block hundreds of threads
on fs locks, I got lockup due to output from debug_show_all_locks()
added by commit b2d4c2edb2e4f89a ("locking/hung_task: Show all locks").

For example, if 1000 threads were blocked in TASK_UNINTERRUPTIBLE state
and 500 out of 1000 threads hold some lock, debug_show_all_locks() from
for_each_process_thread() loop will report locks held by 500 threads for
1000 times. This is a too much noise.

In order to make sure rcu_lock_break() is called frequently, we should
avoid calling debug_show_all_locks() from for_each_process_thread() loop
because debug_show_all_locks() effectively calls for_each_process_thread()
loop. Let's defer calling debug_show_all_locks() till before panic() or
leaving for_each_process_thread() loop.

Signed-off-by: Tetsuo Handa <penguin-ker...@i-love.sakura.ne.jp>
Cc: Vegard Nossum <vegard.nos...@oracle.com>
---
 kernel/hung_task.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index f0f8e2a..751593e 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -43,6 +43,7 @@
 int __read_mostly sysctl_hung_task_warnings = 10;
 
 static int __read_mostly did_panic;
+static bool hung_task_show_lock;
 
 static struct task_struct *watchdog_task;
 
@@ -120,12 +121,14 @@ static void check_hung_task(struct task_struct *t, 
unsigned long timeout)
                pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
                        " disables this message.\n");
                sched_show_task(t);
-               debug_show_all_locks();
+               hung_task_show_lock = true;
        }
 
        touch_nmi_watchdog();
 
        if (sysctl_hung_task_panic) {
+               if (hung_task_show_lock)
+                       debug_show_all_locks();
                trigger_all_cpu_backtrace();
                panic("hung_task: blocked tasks");
        }
@@ -172,6 +175,7 @@ static void check_hung_uninterruptible_tasks(unsigned long 
timeout)
        if (test_taint(TAINT_DIE) || did_panic)
                return;
 
+       hung_task_show_lock = false;
        rcu_read_lock();
        for_each_process_thread(g, t) {
                if (!max_count--)
@@ -187,6 +191,8 @@ static void check_hung_uninterruptible_tasks(unsigned long 
timeout)
        }
  unlock:
        rcu_read_unlock();
+       if (hung_task_show_lock)
+               debug_show_all_locks();
 }
 
 static long hung_timeout_jiffies(unsigned long last_checked,
-- 
1.8.3.1

Reply via email to