Re: [PATCH] locking/hung_task: Defer showing held locks

2017-03-12 Thread Vegard Nossum

On 12/03/2017 06:33, Tetsuo Handa wrote:

When I was running my testcase which may block hundreds of threads
on fs locks, I got lockup due to output from debug_show_all_locks()
added by commit b2d4c2edb2e4f89a ("locking/hung_task: Show all locks").

For example, if 1000 threads were blocked in TASK_UNINTERRUPTIBLE state
and 500 out of 1000 threads hold some lock, debug_show_all_locks() from
for_each_process_thread() loop will report locks held by 500 threads for
1000 times. This is a too much noise.

In order to make sure rcu_lock_break() is called frequently, we should
avoid calling debug_show_all_locks() from for_each_process_thread() loop
because debug_show_all_locks() effectively calls for_each_process_thread()
loop. Let's defer calling debug_show_all_locks() till before panic() or
leaving for_each_process_thread() loop.

Signed-off-by: Tetsuo Handa 
Cc: Vegard Nossum 
---
 kernel/hung_task.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index f0f8e2a..751593e 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -43,6 +43,7 @@
 int __read_mostly sysctl_hung_task_warnings = 10;

 static int __read_mostly did_panic;
+static bool hung_task_show_lock;

 static struct task_struct *watchdog_task;

@@ -120,12 +121,14 @@ static void check_hung_task(struct task_struct *t, 
unsigned long timeout)
pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
" disables this message.\n");
sched_show_task(t);
-   debug_show_all_locks();
+   hung_task_show_lock = true;
}

touch_nmi_watchdog();

if (sysctl_hung_task_panic) {
+   if (hung_task_show_lock)
+   debug_show_all_locks();
trigger_all_cpu_backtrace();
panic("hung_task: blocked tasks");
}
@@ -172,6 +175,7 @@ static void check_hung_uninterruptible_tasks(unsigned long 
timeout)
if (test_taint(TAINT_DIE) || did_panic)
return;

+   hung_task_show_lock = false;
rcu_read_lock();
for_each_process_thread(g, t) {
if (!max_count--)
@@ -187,6 +191,8 @@ static void check_hung_uninterruptible_tasks(unsigned long 
timeout)
}
  unlock:
rcu_read_unlock();
+   if (hung_task_show_lock)
+   debug_show_all_locks();
 }

 static long hung_timeout_jiffies(unsigned long last_checked,



Reviewed/Acked-by: Vegard Nossum 

Thank you for fixing this.


Vegard


Re: [PATCH] locking/hung_task: Defer showing held locks

2017-03-12 Thread Vegard Nossum

On 12/03/2017 06:33, Tetsuo Handa wrote:

When I was running my testcase which may block hundreds of threads
on fs locks, I got lockup due to output from debug_show_all_locks()
added by commit b2d4c2edb2e4f89a ("locking/hung_task: Show all locks").

For example, if 1000 threads were blocked in TASK_UNINTERRUPTIBLE state
and 500 out of 1000 threads hold some lock, debug_show_all_locks() from
for_each_process_thread() loop will report locks held by 500 threads for
1000 times. This is a too much noise.

In order to make sure rcu_lock_break() is called frequently, we should
avoid calling debug_show_all_locks() from for_each_process_thread() loop
because debug_show_all_locks() effectively calls for_each_process_thread()
loop. Let's defer calling debug_show_all_locks() till before panic() or
leaving for_each_process_thread() loop.

Signed-off-by: Tetsuo Handa 
Cc: Vegard Nossum 
---
 kernel/hung_task.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index f0f8e2a..751593e 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -43,6 +43,7 @@
 int __read_mostly sysctl_hung_task_warnings = 10;

 static int __read_mostly did_panic;
+static bool hung_task_show_lock;

 static struct task_struct *watchdog_task;

@@ -120,12 +121,14 @@ static void check_hung_task(struct task_struct *t, 
unsigned long timeout)
pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
" disables this message.\n");
sched_show_task(t);
-   debug_show_all_locks();
+   hung_task_show_lock = true;
}

touch_nmi_watchdog();

if (sysctl_hung_task_panic) {
+   if (hung_task_show_lock)
+   debug_show_all_locks();
trigger_all_cpu_backtrace();
panic("hung_task: blocked tasks");
}
@@ -172,6 +175,7 @@ static void check_hung_uninterruptible_tasks(unsigned long 
timeout)
if (test_taint(TAINT_DIE) || did_panic)
return;

+   hung_task_show_lock = false;
rcu_read_lock();
for_each_process_thread(g, t) {
if (!max_count--)
@@ -187,6 +191,8 @@ static void check_hung_uninterruptible_tasks(unsigned long 
timeout)
}
  unlock:
rcu_read_unlock();
+   if (hung_task_show_lock)
+   debug_show_all_locks();
 }

 static long hung_timeout_jiffies(unsigned long last_checked,



Reviewed/Acked-by: Vegard Nossum 

Thank you for fixing this.


Vegard


[PATCH] locking/hung_task: Defer showing held locks

2017-03-11 Thread Tetsuo Handa
When I was running my testcase which may block hundreds of threads
on fs locks, I got lockup due to output from debug_show_all_locks()
added by commit b2d4c2edb2e4f89a ("locking/hung_task: Show all locks").

For example, if 1000 threads were blocked in TASK_UNINTERRUPTIBLE state
and 500 out of 1000 threads hold some lock, debug_show_all_locks() from
for_each_process_thread() loop will report locks held by 500 threads for
1000 times. This is a too much noise.

In order to make sure rcu_lock_break() is called frequently, we should
avoid calling debug_show_all_locks() from for_each_process_thread() loop
because debug_show_all_locks() effectively calls for_each_process_thread()
loop. Let's defer calling debug_show_all_locks() till before panic() or
leaving for_each_process_thread() loop.

Signed-off-by: Tetsuo Handa 
Cc: Vegard Nossum 
---
 kernel/hung_task.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index f0f8e2a..751593e 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -43,6 +43,7 @@
 int __read_mostly sysctl_hung_task_warnings = 10;
 
 static int __read_mostly did_panic;
+static bool hung_task_show_lock;
 
 static struct task_struct *watchdog_task;
 
@@ -120,12 +121,14 @@ static void check_hung_task(struct task_struct *t, 
unsigned long timeout)
pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
" disables this message.\n");
sched_show_task(t);
-   debug_show_all_locks();
+   hung_task_show_lock = true;
}
 
touch_nmi_watchdog();
 
if (sysctl_hung_task_panic) {
+   if (hung_task_show_lock)
+   debug_show_all_locks();
trigger_all_cpu_backtrace();
panic("hung_task: blocked tasks");
}
@@ -172,6 +175,7 @@ static void check_hung_uninterruptible_tasks(unsigned long 
timeout)
if (test_taint(TAINT_DIE) || did_panic)
return;
 
+   hung_task_show_lock = false;
rcu_read_lock();
for_each_process_thread(g, t) {
if (!max_count--)
@@ -187,6 +191,8 @@ static void check_hung_uninterruptible_tasks(unsigned long 
timeout)
}
  unlock:
rcu_read_unlock();
+   if (hung_task_show_lock)
+   debug_show_all_locks();
 }
 
 static long hung_timeout_jiffies(unsigned long last_checked,
-- 
1.8.3.1



[PATCH] locking/hung_task: Defer showing held locks

2017-03-11 Thread Tetsuo Handa
When I was running my testcase which may block hundreds of threads
on fs locks, I got lockup due to output from debug_show_all_locks()
added by commit b2d4c2edb2e4f89a ("locking/hung_task: Show all locks").

For example, if 1000 threads were blocked in TASK_UNINTERRUPTIBLE state
and 500 out of 1000 threads hold some lock, debug_show_all_locks() from
for_each_process_thread() loop will report locks held by 500 threads for
1000 times. This is a too much noise.

In order to make sure rcu_lock_break() is called frequently, we should
avoid calling debug_show_all_locks() from for_each_process_thread() loop
because debug_show_all_locks() effectively calls for_each_process_thread()
loop. Let's defer calling debug_show_all_locks() till before panic() or
leaving for_each_process_thread() loop.

Signed-off-by: Tetsuo Handa 
Cc: Vegard Nossum 
---
 kernel/hung_task.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index f0f8e2a..751593e 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -43,6 +43,7 @@
 int __read_mostly sysctl_hung_task_warnings = 10;
 
 static int __read_mostly did_panic;
+static bool hung_task_show_lock;
 
 static struct task_struct *watchdog_task;
 
@@ -120,12 +121,14 @@ static void check_hung_task(struct task_struct *t, 
unsigned long timeout)
pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
" disables this message.\n");
sched_show_task(t);
-   debug_show_all_locks();
+   hung_task_show_lock = true;
}
 
touch_nmi_watchdog();
 
if (sysctl_hung_task_panic) {
+   if (hung_task_show_lock)
+   debug_show_all_locks();
trigger_all_cpu_backtrace();
panic("hung_task: blocked tasks");
}
@@ -172,6 +175,7 @@ static void check_hung_uninterruptible_tasks(unsigned long 
timeout)
if (test_taint(TAINT_DIE) || did_panic)
return;
 
+   hung_task_show_lock = false;
rcu_read_lock();
for_each_process_thread(g, t) {
if (!max_count--)
@@ -187,6 +191,8 @@ static void check_hung_uninterruptible_tasks(unsigned long 
timeout)
}
  unlock:
rcu_read_unlock();
+   if (hung_task_show_lock)
+   debug_show_all_locks();
 }
 
 static long hung_timeout_jiffies(unsigned long last_checked,
-- 
1.8.3.1



Re: [PATCH] locking/hung_task: Defer showing held locks

2017-02-28 Thread Tetsuo Handa
Vegard, can I send this patch?

Tetsuo Handa wrote:
> Vegard Nossum wrote:
> > On 13 December 2016 at 15:45, Tetsuo Handa
> >  wrote:
> > > When I was running my testcase which may block hundreds of threads
> > > on fs locks, I got lockup due to output from debug_show_all_locks()
> > > added by commit b2d4c2edb2e4f89a ("locking/hung_task: Show all locks").
> > >
> > > I think we don't need to call debug_show_all_locks() on each blocked
> > > thread. Let's defer calling debug_show_all_locks() till before panic()
> > > or leaving for_each_process_thread() loop.
> >
> > First of all, sorry for not answering earlier.
> 
> No problem.
> 
> >
> > I'm not sure I fully understand the problem, you say the "output from
> > debug_show_all_locks()" caused a lockup, but was the problem simply
> > that the amount of output caused it to stall for a long time?
> 
> In Linux 4.9, in order to tell administrator that something might be wrong
> with memory allocation, warn_alloc() which calls printk() periodically
> when memory allocation is stalling for too long was added. However, since
> printk() waits until all pending data is sent to console using cond_resched(),
> printk() continues waiting as long as somebody else calls printk() when
> cond_resched() is called. This is problematic under OOM situation.
> 
> Since the OOM killer calls printk() with oom_lock held, it happened that
> printk() called from the OOM killer is forever unable to return because
> warn_alloc() periodically calls printk() since the OOM killer is holding
> oom_lock.
> 
> And it happened that khungtaskd is another source which calls printk()
> periodically when threads are blocked on fs locks waiting for memory
> allocation. debug_show_all_locks() generates far more amount of output
> compared to warn_alloc() if debug_show_all_locks() is called on each
> thread blocked on fs locks waiting for memory allocation. Therefore,
> we should avoid calling debug_show_all_locks() on each blocked thread.
> 
> Full story starts at 
> http://lkml.kernel.org/r/1481020439-5867-1-git-send-email-penguin-ker...@i-love.sakura.ne.jp
>  but
> I appreciate if you can join on 
> http://lkml.kernel.org/r/1478416501-10104-1-git-send-email-penguin-ker...@i-love.sakura.ne.jp
>  .
> 
> >
> > Could we instead
> >
> > 1) move the debug_show_all_locks() into the if
> > (sysctl_hung_task_panic) bit unconditionally
> >
> > 2) call something (touch_nmi_watchdog()?) inside debug_show_all_locks()
> >
> > 3) in another way make debug_show_all_locks() more robust so it doesn't 
> > "lockup"
> >
> > ?
> 
> Yes, that might be an improvement. But not needed for this patch.
> 


Re: [PATCH] locking/hung_task: Defer showing held locks

2017-02-28 Thread Tetsuo Handa
Vegard, can I send this patch?

Tetsuo Handa wrote:
> Vegard Nossum wrote:
> > On 13 December 2016 at 15:45, Tetsuo Handa
> >  wrote:
> > > When I was running my testcase which may block hundreds of threads
> > > on fs locks, I got lockup due to output from debug_show_all_locks()
> > > added by commit b2d4c2edb2e4f89a ("locking/hung_task: Show all locks").
> > >
> > > I think we don't need to call debug_show_all_locks() on each blocked
> > > thread. Let's defer calling debug_show_all_locks() till before panic()
> > > or leaving for_each_process_thread() loop.
> >
> > First of all, sorry for not answering earlier.
> 
> No problem.
> 
> >
> > I'm not sure I fully understand the problem, you say the "output from
> > debug_show_all_locks()" caused a lockup, but was the problem simply
> > that the amount of output caused it to stall for a long time?
> 
> In Linux 4.9, in order to tell administrator that something might be wrong
> with memory allocation, warn_alloc() which calls printk() periodically
> when memory allocation is stalling for too long was added. However, since
> printk() waits until all pending data is sent to console using cond_resched(),
> printk() continues waiting as long as somebody else calls printk() when
> cond_resched() is called. This is problematic under OOM situation.
> 
> Since the OOM killer calls printk() with oom_lock held, it happened that
> printk() called from the OOM killer is forever unable to return because
> warn_alloc() periodically calls printk() since the OOM killer is holding
> oom_lock.
> 
> And it happened that khungtaskd is another source which calls printk()
> periodically when threads are blocked on fs locks waiting for memory
> allocation. debug_show_all_locks() generates far more amount of output
> compared to warn_alloc() if debug_show_all_locks() is called on each
> thread blocked on fs locks waiting for memory allocation. Therefore,
> we should avoid calling debug_show_all_locks() on each blocked thread.
> 
> Full story starts at 
> http://lkml.kernel.org/r/1481020439-5867-1-git-send-email-penguin-ker...@i-love.sakura.ne.jp
>  but
> I appreciate if you can join on 
> http://lkml.kernel.org/r/1478416501-10104-1-git-send-email-penguin-ker...@i-love.sakura.ne.jp
>  .
> 
> >
> > Could we instead
> >
> > 1) move the debug_show_all_locks() into the if
> > (sysctl_hung_task_panic) bit unconditionally
> >
> > 2) call something (touch_nmi_watchdog()?) inside debug_show_all_locks()
> >
> > 3) in another way make debug_show_all_locks() more robust so it doesn't 
> > "lockup"
> >
> > ?
> 
> Yes, that might be an improvement. But not needed for this patch.
> 


Re: [PATCH] locking/hung_task: Defer showing held locks

2016-12-20 Thread Tetsuo Handa
Vegard Nossum wrote:
> On 13 December 2016 at 15:45, Tetsuo Handa
>  wrote:
> > When I was running my testcase which may block hundreds of threads
> > on fs locks, I got lockup due to output from debug_show_all_locks()
> > added by commit b2d4c2edb2e4f89a ("locking/hung_task: Show all locks").
> >
> > I think we don't need to call debug_show_all_locks() on each blocked
> > thread. Let's defer calling debug_show_all_locks() till before panic()
> > or leaving for_each_process_thread() loop.
>
> First of all, sorry for not answering earlier.

No problem.

>
> I'm not sure I fully understand the problem, you say the "output from
> debug_show_all_locks()" caused a lockup, but was the problem simply
> that the amount of output caused it to stall for a long time?

In Linux 4.9, in order to tell administrator that something might be wrong
with memory allocation, warn_alloc() which calls printk() periodically
when memory allocation is stalling for too long was added. However, since
printk() waits until all pending data is sent to console using cond_resched(),
printk() continues waiting as long as somebody else calls printk() when
cond_resched() is called. This is problematic under OOM situation.

Since the OOM killer calls printk() with oom_lock held, it happened that
printk() called from the OOM killer is forever unable to return because
warn_alloc() periodically calls printk() since the OOM killer is holding
oom_lock.

And it happened that khungtaskd is another source which calls printk()
periodically when threads are blocked on fs locks waiting for memory
allocation. debug_show_all_locks() generates far more amount of output
compared to warn_alloc() if debug_show_all_locks() is called on each
thread blocked on fs locks waiting for memory allocation. Therefore,
we should avoid calling debug_show_all_locks() on each blocked thread.

Full story starts at 
http://lkml.kernel.org/r/1481020439-5867-1-git-send-email-penguin-ker...@i-love.sakura.ne.jp
 but
I appreciate if you can join on 
http://lkml.kernel.org/r/1478416501-10104-1-git-send-email-penguin-ker...@i-love.sakura.ne.jp
 .

>
> Could we instead
>
> 1) move the debug_show_all_locks() into the if
> (sysctl_hung_task_panic) bit unconditionally
>
> 2) call something (touch_nmi_watchdog()?) inside debug_show_all_locks()
>
> 3) in another way make debug_show_all_locks() more robust so it doesn't 
> "lockup"
>
> ?

Yes, that might be an improvement. But not needed for this patch.


Re: [PATCH] locking/hung_task: Defer showing held locks

2016-12-20 Thread Tetsuo Handa
Vegard Nossum wrote:
> On 13 December 2016 at 15:45, Tetsuo Handa
>  wrote:
> > When I was running my testcase which may block hundreds of threads
> > on fs locks, I got lockup due to output from debug_show_all_locks()
> > added by commit b2d4c2edb2e4f89a ("locking/hung_task: Show all locks").
> >
> > I think we don't need to call debug_show_all_locks() on each blocked
> > thread. Let's defer calling debug_show_all_locks() till before panic()
> > or leaving for_each_process_thread() loop.
>
> First of all, sorry for not answering earlier.

No problem.

>
> I'm not sure I fully understand the problem, you say the "output from
> debug_show_all_locks()" caused a lockup, but was the problem simply
> that the amount of output caused it to stall for a long time?

In Linux 4.9, in order to tell administrator that something might be wrong
with memory allocation, warn_alloc() which calls printk() periodically
when memory allocation is stalling for too long was added. However, since
printk() waits until all pending data is sent to console using cond_resched(),
printk() continues waiting as long as somebody else calls printk() when
cond_resched() is called. This is problematic under OOM situation.

Since the OOM killer calls printk() with oom_lock held, it happened that
printk() called from the OOM killer is forever unable to return because
warn_alloc() periodically calls printk() since the OOM killer is holding
oom_lock.

And it happened that khungtaskd is another source which calls printk()
periodically when threads are blocked on fs locks waiting for memory
allocation. debug_show_all_locks() generates far more amount of output
compared to warn_alloc() if debug_show_all_locks() is called on each
thread blocked on fs locks waiting for memory allocation. Therefore,
we should avoid calling debug_show_all_locks() on each blocked thread.

Full story starts at 
http://lkml.kernel.org/r/1481020439-5867-1-git-send-email-penguin-ker...@i-love.sakura.ne.jp
 but
I appreciate if you can join on 
http://lkml.kernel.org/r/1478416501-10104-1-git-send-email-penguin-ker...@i-love.sakura.ne.jp
 .

>
> Could we instead
>
> 1) move the debug_show_all_locks() into the if
> (sysctl_hung_task_panic) bit unconditionally
>
> 2) call something (touch_nmi_watchdog()?) inside debug_show_all_locks()
>
> 3) in another way make debug_show_all_locks() more robust so it doesn't 
> "lockup"
>
> ?

Yes, that might be an improvement. But not needed for this patch.


Re: [PATCH] locking/hung_task: Defer showing held locks

2016-12-20 Thread Vegard Nossum
On 13 December 2016 at 15:45, Tetsuo Handa
 wrote:
> When I was running my testcase which may block hundreds of threads
> on fs locks, I got lockup due to output from debug_show_all_locks()
> added by commit b2d4c2edb2e4f89a ("locking/hung_task: Show all locks").
>
> I think we don't need to call debug_show_all_locks() on each blocked
> thread. Let's defer calling debug_show_all_locks() till before panic()
> or leaving for_each_process_thread() loop.

First of all, sorry for not answering earlier.

I'm not sure I fully understand the problem, you say the "output from
debug_show_all_locks()" caused a lockup, but was the problem simply
that the amount of output caused it to stall for a long time?

Could we instead

1) move the debug_show_all_locks() into the if
(sysctl_hung_task_panic) bit unconditionally

2) call something (touch_nmi_watchdog()?) inside debug_show_all_locks()

3) in another way make debug_show_all_locks() more robust so it doesn't "lockup"

?


Vegard


Re: [PATCH] locking/hung_task: Defer showing held locks

2016-12-20 Thread Vegard Nossum
On 13 December 2016 at 15:45, Tetsuo Handa
 wrote:
> When I was running my testcase which may block hundreds of threads
> on fs locks, I got lockup due to output from debug_show_all_locks()
> added by commit b2d4c2edb2e4f89a ("locking/hung_task: Show all locks").
>
> I think we don't need to call debug_show_all_locks() on each blocked
> thread. Let's defer calling debug_show_all_locks() till before panic()
> or leaving for_each_process_thread() loop.

First of all, sorry for not answering earlier.

I'm not sure I fully understand the problem, you say the "output from
debug_show_all_locks()" caused a lockup, but was the problem simply
that the amount of output caused it to stall for a long time?

Could we instead

1) move the debug_show_all_locks() into the if
(sysctl_hung_task_panic) bit unconditionally

2) call something (touch_nmi_watchdog()?) inside debug_show_all_locks()

3) in another way make debug_show_all_locks() more robust so it doesn't "lockup"

?


Vegard


[PATCH] locking/hung_task: Defer showing held locks

2016-12-13 Thread Tetsuo Handa
When I was running my testcase which may block hundreds of threads
on fs locks, I got lockup due to output from debug_show_all_locks()
added by commit b2d4c2edb2e4f89a ("locking/hung_task: Show all locks").

I think we don't need to call debug_show_all_locks() on each blocked
thread. Let's defer calling debug_show_all_locks() till before panic()
or leaving for_each_process_thread() loop.

Signed-off-by: Tetsuo Handa 
Cc: Vegard Nossum 
Cc: Andrew Morton 
Cc: Linus Torvalds 
Cc: Mandeep Singh Baines 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
---
 kernel/hung_task.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 2b59c82..a9ada5d 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -40,6 +40,7 @@
 int __read_mostly sysctl_hung_task_warnings = 10;
 
 static int __read_mostly did_panic;
+static bool hung_task_show_lock;
 
 static struct task_struct *watchdog_task;
 
@@ -116,12 +117,14 @@ static void check_hung_task(struct task_struct *t, 
unsigned long timeout)
pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
" disables this message.\n");
sched_show_task(t);
-   debug_show_all_locks();
+   hung_task_show_lock = true;
}
 
touch_nmi_watchdog();
 
if (sysctl_hung_task_panic) {
+   if (hung_task_show_lock)
+   debug_show_all_locks();
trigger_all_cpu_backtrace();
panic("hung_task: blocked tasks");
}
@@ -168,6 +171,7 @@ static void check_hung_uninterruptible_tasks(unsigned long 
timeout)
if (test_taint(TAINT_DIE) || did_panic)
return;
 
+   hung_task_show_lock = false;
rcu_read_lock();
for_each_process_thread(g, t) {
if (!max_count--)
@@ -183,6 +187,8 @@ static void check_hung_uninterruptible_tasks(unsigned long 
timeout)
}
  unlock:
rcu_read_unlock();
+   if (hung_task_show_lock)
+   debug_show_all_locks();
 }
 
 static long hung_timeout_jiffies(unsigned long last_checked,
-- 
1.8.3.1



[PATCH] locking/hung_task: Defer showing held locks

2016-12-13 Thread Tetsuo Handa
When I was running my testcase which may block hundreds of threads
on fs locks, I got lockup due to output from debug_show_all_locks()
added by commit b2d4c2edb2e4f89a ("locking/hung_task: Show all locks").

I think we don't need to call debug_show_all_locks() on each blocked
thread. Let's defer calling debug_show_all_locks() till before panic()
or leaving for_each_process_thread() loop.

Signed-off-by: Tetsuo Handa 
Cc: Vegard Nossum 
Cc: Andrew Morton 
Cc: Linus Torvalds 
Cc: Mandeep Singh Baines 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
---
 kernel/hung_task.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 2b59c82..a9ada5d 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -40,6 +40,7 @@
 int __read_mostly sysctl_hung_task_warnings = 10;
 
 static int __read_mostly did_panic;
+static bool hung_task_show_lock;
 
 static struct task_struct *watchdog_task;
 
@@ -116,12 +117,14 @@ static void check_hung_task(struct task_struct *t, 
unsigned long timeout)
pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
" disables this message.\n");
sched_show_task(t);
-   debug_show_all_locks();
+   hung_task_show_lock = true;
}
 
touch_nmi_watchdog();
 
if (sysctl_hung_task_panic) {
+   if (hung_task_show_lock)
+   debug_show_all_locks();
trigger_all_cpu_backtrace();
panic("hung_task: blocked tasks");
}
@@ -168,6 +171,7 @@ static void check_hung_uninterruptible_tasks(unsigned long 
timeout)
if (test_taint(TAINT_DIE) || did_panic)
return;
 
+   hung_task_show_lock = false;
rcu_read_lock();
for_each_process_thread(g, t) {
if (!max_count--)
@@ -183,6 +187,8 @@ static void check_hung_uninterruptible_tasks(unsigned long 
timeout)
}
  unlock:
rcu_read_unlock();
+   if (hung_task_show_lock)
+   debug_show_all_locks();
 }
 
 static long hung_timeout_jiffies(unsigned long last_checked,
-- 
1.8.3.1