Re: INFO: task hung in pipe_read (2)

2020-08-13 Thread Tetsuo Handa
On 2020/08/13 16:00, Tetsuo Handa wrote:
> On 2020/08/11 4:29, Andrea Arcangeli wrote:
>> However once the mutex is killable there's no concern anymore and the
>> hangcheck timer is correct also not reporting any misbehavior anymore.
> 
> Do you mean something like below untested patch? I think that the difficult
> part is that mutex for close() operation can't become killable. And I worry
> that syzbot soon reports a hung task at pipe_release() instead of pipe_read()
> or pipe_write(). If pagefault with locks held can be avoided, there will be no
> such worry.

Hmm, the difficult part is not limited to close() operation. While some of them
are low hanging fruits, the rest seems to be subtle or complicated. Al, do you
think that we can make all pipe mutex killable?

 fs/pipe.c | 104 +++---
 fs/splice.c   |  60 +++---
 include/linux/pipe_fs_i.h |   5 +-
 3 files changed, 134 insertions(+), 35 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 60dbee457143..f21c420dc7c7 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -66,6 +66,13 @@ static void pipe_lock_nested(struct pipe_inode_info *pipe, 
int subclass)
mutex_lock_nested(&pipe->mutex, subclass);
 }
 
+static int __must_check pipe_lock_killable_nested(struct pipe_inode_info 
*pipe, int subclass)
+{
+   if (pipe->files)
+   return mutex_lock_killable_nested(&pipe->mutex, subclass);
+   return 0;
+}
+
 void pipe_lock(struct pipe_inode_info *pipe)
 {
/*
@@ -75,6 +82,14 @@ void pipe_lock(struct pipe_inode_info *pipe)
 }
 EXPORT_SYMBOL(pipe_lock);
 
+int pipe_lock_killable(struct pipe_inode_info *pipe)
+{
+   /*
+* pipe_lock() nests non-pipe inode locks (for writing to a file)
+*/
+   return pipe_lock_killable_nested(pipe, I_MUTEX_PARENT);
+}
+
 void pipe_unlock(struct pipe_inode_info *pipe)
 {
if (pipe->files)
@@ -87,23 +102,37 @@ static inline void __pipe_lock(struct pipe_inode_info 
*pipe)
mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
 }
 
+static inline int __must_check __pipe_lock_killable(struct pipe_inode_info 
*pipe)
+{
+   return mutex_lock_killable_nested(&pipe->mutex, I_MUTEX_PARENT);
+}
+
 static inline void __pipe_unlock(struct pipe_inode_info *pipe)
 {
mutex_unlock(&pipe->mutex);
 }
 
-void pipe_double_lock(struct pipe_inode_info *pipe1,
- struct pipe_inode_info *pipe2)
+int pipe_double_lock_killable(struct pipe_inode_info *pipe1,
+ struct pipe_inode_info *pipe2)
 {
BUG_ON(pipe1 == pipe2);
 
if (pipe1 < pipe2) {
-   pipe_lock_nested(pipe1, I_MUTEX_PARENT);
-   pipe_lock_nested(pipe2, I_MUTEX_CHILD);
+   if (pipe_lock_killable_nested(pipe1, I_MUTEX_PARENT))
+   return -ERESTARTSYS;
+   if (pipe_lock_killable_nested(pipe2, I_MUTEX_CHILD)) {
+   pipe_unlock(pipe1);
+   return -ERESTARTSYS;
+   }
} else {
-   pipe_lock_nested(pipe2, I_MUTEX_PARENT);
-   pipe_lock_nested(pipe1, I_MUTEX_CHILD);
+   if (pipe_lock_killable_nested(pipe2, I_MUTEX_PARENT))
+   return -ERESTARTSYS;
+   if (pipe_lock_killable_nested(pipe1, I_MUTEX_CHILD)) {
+   pipe_unlock(pipe2);
+   return -ERESTARTSYS;
+   }
}
+   return 0;
 }
 
 /* Drop the inode semaphore and wait for a pipe event, atomically */
@@ -125,6 +154,24 @@ void pipe_wait(struct pipe_inode_info *pipe)
pipe_lock(pipe);
 }
 
+int pipe_wait_killable(struct pipe_inode_info *pipe)
+{
+   DEFINE_WAIT(rdwait);
+   DEFINE_WAIT(wrwait);
+
+   /*
+* Pipes are system-local resources, so sleeping on them
+* is considered a noninteractive wait:
+*/
+   prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
+   prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE);
+   pipe_unlock(pipe);
+   schedule();
+   finish_wait(&pipe->rd_wait, &rdwait);
+   finish_wait(&pipe->wr_wait, &wrwait);
+   return pipe_lock_killable(pipe);
+}
+
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
  struct pipe_buffer *buf)
 {
@@ -244,7 +291,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
return 0;
 
ret = 0;
-   __pipe_lock(pipe);
+   if (__pipe_lock_killable(pipe))
+   return -ERESTARTSYS;
 
/*
 * We only wake up writers if the pipe was full when we started
@@ -381,7 +429,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
if (wait_event_interruptible_exclusive(pipe->rd_wait, 
pipe_readable(pipe)) < 0)
return -ERESTARTSYS;
 
-   __pipe_lock(pipe);
+   if (__pipe_lock_killable(pipe))
+

Re: INFO: task hung in pipe_read (2)

2020-08-13 Thread Tetsuo Handa
On 2020/08/11 4:29, Andrea Arcangeli wrote:
> However once the mutex is killable there's no concern anymore and the
> hangcheck timer is correct also not reporting any misbehavior anymore.

Do you mean something like below untested patch? I think that the difficult
part is that mutex for close() operation can't become killable. And I worry
that syzbot soon reports a hung task at pipe_release() instead of pipe_read()
or pipe_write(). If pagefault with locks held can be avoided, there will be no
such worry.

 fs/pipe.c | 106 ++
 fs/splice.c   |  41 --
 include/linux/pipe_fs_i.h |   5 ++-
 3 files changed, 120 insertions(+), 32 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 60dbee4..537d1ef 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -66,6 +66,13 @@ static void pipe_lock_nested(struct pipe_inode_info *pipe, 
int subclass)
mutex_lock_nested(&pipe->mutex, subclass);
 }
 
+static int __must_check pipe_lock_killable_nested(struct pipe_inode_info 
*pipe, int subclass)
+{
+   if (pipe->files)
+   return mutex_lock_killable_nested(&pipe->mutex, subclass);
+   return 0;
+}
+
 void pipe_lock(struct pipe_inode_info *pipe)
 {
/*
@@ -75,6 +82,14 @@ void pipe_lock(struct pipe_inode_info *pipe)
 }
 EXPORT_SYMBOL(pipe_lock);
 
+int pipe_lock_killable(struct pipe_inode_info *pipe)
+{
+   /*
+* pipe_lock() nests non-pipe inode locks (for writing to a file)
+*/
+   return pipe_lock_killable_nested(pipe, I_MUTEX_PARENT);
+}
+
 void pipe_unlock(struct pipe_inode_info *pipe)
 {
if (pipe->files)
@@ -87,23 +102,37 @@ static inline void __pipe_lock(struct pipe_inode_info 
*pipe)
mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
 }
 
+static inline int __must_check __pipe_lock_killable(struct pipe_inode_info 
*pipe)
+{
+   return mutex_lock_killable_nested(&pipe->mutex, I_MUTEX_PARENT);
+}
+
 static inline void __pipe_unlock(struct pipe_inode_info *pipe)
 {
mutex_unlock(&pipe->mutex);
 }
 
-void pipe_double_lock(struct pipe_inode_info *pipe1,
- struct pipe_inode_info *pipe2)
+int pipe_double_lock_killable(struct pipe_inode_info *pipe1,
+ struct pipe_inode_info *pipe2)
 {
BUG_ON(pipe1 == pipe2);
 
if (pipe1 < pipe2) {
-   pipe_lock_nested(pipe1, I_MUTEX_PARENT);
-   pipe_lock_nested(pipe2, I_MUTEX_CHILD);
+   if (pipe_lock_killable_nested(pipe1, I_MUTEX_PARENT))
+   return -ERESTARTSYS;
+   if (pipe_lock_killable_nested(pipe2, I_MUTEX_CHILD)) {
+   pipe_unlock(pipe1);
+   return -ERESTARTSYS;
+   }
} else {
-   pipe_lock_nested(pipe2, I_MUTEX_PARENT);
-   pipe_lock_nested(pipe1, I_MUTEX_CHILD);
+   if (pipe_lock_killable_nested(pipe2, I_MUTEX_PARENT))
+   return -ERESTARTSYS;
+   if (pipe_lock_killable_nested(pipe1, I_MUTEX_CHILD)) {
+   pipe_unlock(pipe2);
+   return -ERESTARTSYS;
+   }
}
+   return 0;
 }
 
 /* Drop the inode semaphore and wait for a pipe event, atomically */
@@ -125,6 +154,24 @@ void pipe_wait(struct pipe_inode_info *pipe)
pipe_lock(pipe);
 }
 
+int pipe_wait_killable(struct pipe_inode_info *pipe)
+{
+   DEFINE_WAIT(rdwait);
+   DEFINE_WAIT(wrwait);
+
+   /*
+* Pipes are system-local resources, so sleeping on them
+* is considered a noninteractive wait:
+*/
+   prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
+   prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE);
+   pipe_unlock(pipe);
+   schedule();
+   finish_wait(&pipe->rd_wait, &rdwait);
+   finish_wait(&pipe->wr_wait, &wrwait);
+   return pipe_lock_killable(pipe);
+}
+
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
  struct pipe_buffer *buf)
 {
@@ -244,7 +291,8 @@ static inline bool pipe_readable(const struct 
pipe_inode_info *pipe)
return 0;
 
ret = 0;
-   __pipe_lock(pipe);
+   if (__pipe_lock_killable(pipe))
+   return -ERESTARTSYS;
 
/*
 * We only wake up writers if the pipe was full when we started
@@ -381,7 +429,8 @@ static inline bool pipe_readable(const struct 
pipe_inode_info *pipe)
if (wait_event_interruptible_exclusive(pipe->rd_wait, 
pipe_readable(pipe)) < 0)
return -ERESTARTSYS;
 
-   __pipe_lock(pipe);
+   if (__pipe_lock_killable(pipe))
+   return -ERESTARTSYS;
was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
wake_next_reader = true;
}
@@ -432,7 +481,8 @@ static inline bool pipe_writable(const struct 
pip

Re: INFO: task hung in pipe_read (2)

2020-08-10 Thread Andrea Arcangeli
Hello Tetsuo,

On Sat, Aug 08, 2020 at 10:01:21AM +0900, Tetsuo Handa wrote:
> use of killable waits disables ability to detect possibility of deadlock 
> (because
> lockdep can't check possibility of deadlock which involves actions in 
> userspace), for
> syzkaller process is SIGKILLed after 5 seconds while khungtaskd's timeout is 
> 140 seconds.
> 
> If we encounter a deadlock in an unattended operation (e.g. some server 
> process),
> we don't have a method for resolving the deadlock. Therefore, I consider that
> t->state == TASK_UNINTERRUPTIBLE check is a bad choice. Unless a sleep is 
> neutral
> (e.g. no lock is held, or obviously safe to sleep with that specific lock 
> held),
> sleeping for 140 seconds inside the kernel is a bad sign even if 
> interruptible/killable.

Task in killable state for seconds as result of another task taking
too long to do something in kernel sounds bad, if the other task had a
legitimate reason to take a long time in normal operations, i.e. like
if the other task was just doing an getdents of a large directory.

Nobody force any app to use userfaultfd, if an app uses it and the
other side of the pipe trusts to read from it, and it gets stuck for
seconds in uninterruptible and killable state, it's either an app bug
resolvable with kill -9. We also can't enforce all signals to run in
presence of other bugs, for example if the task that won't respond to
any signal other than CONT and KILL was blocked in stopped state by a
buggy SIGSTOP. The pipe also can get stuck if the network is down and
it's swapping in from NFS and nobody is forced to take the risk of
using network attached storage as swap device either.

The hangcheck is currently correct to report a concern, because the
other side of the pipe may be another process of another user that
cannot SIGKILL the task blocked in the userfault. That sounds far
fetched and it's not particular concerning anyway, but it's not
technically impossible so I agree with the hangcheck timer reporting
an issue that needs correction.

However once the mutex is killable there's no concern anymore and the
hangcheck timer is correct also not reporting any misbehavior anymore.

Instead of userfaultfd, you can think at 100% kernel faults backed by
swapin from NFS or swaping from attached network storage or swapin
from scsi with a scsi fibre channel accidentally pulled out of a few
seconds. It's nice if uffd can survive as well as nfs or scsi would by
retrying and waiting more than 1sec.

> Can we do something like this?
> 
>   bool retried = false;
> retry:
>   lock();
>   disable_fault();
>   ret = access_memory_that_might_fault();
>   enable_fault();
>   if (ret == -EWOULDFAULT && !retried)
> goto retry_without_lock;
>   if (ret == 0)
> ret = do_something();
>   unlock();
>   return ret;
> retry_without_lock:
>   unlock();
>   ret = access_memory_that_might_fault();
>   retried = true;
>   goto retry;

This would work, but it'll make the kernel more complex than using a
killable mutex.

It'd also give a worse runtime than the killable mutex, if the only
source of blocking events while holding the mutex wouldn't be the page
fault.

With just 2 processes in this case probably it would be fine and there
are likely won't be other sources of contention, so the main cons is
just the code complexity to be maintained and the fact it won't
provide any measurable practical benefit, if something it'll run
slower by having to repeat the same fault in blocking and non blocking
mode.

With regard to the reporting of the hangcheck timer most modern paging
code uses killable mutex because unlike the pipe code, there can be
other sources of blockage and you don't want to wait for shared
resources to unblock a process that is waiting on a mutex. I think
trying to reduce the usage of killable mutex overall is a ship that
has sailed, it won't move the needle to just avoid it in pipe code
since it'll remain everywhere else.

So I'm certainly not against your proposal, but if we increase the
complexity like above then I'd find it more attractive if it was for
some other benefit unrelated to userfaultfd, or swapin from NFS or
network attached storage for that matter, and I don't see a big enough
benefit to justify it.

Thanks!
Andrea

PS. I'll be busy until Wed sorry if I don't answer promptly to
followups. If somebody could give a try to add the killable mutex
bailout failure paths that return to userland direct, or your more
complex alternative it'd be great.



Re: INFO: task hung in pipe_read (2)

2020-08-07 Thread Tetsuo Handa
On 2020/08/07 14:31, Andrea Arcangeli wrote:
>> Andrea? Comments? As mentioned, this is probably much too aggressive,
>> but I do think we need to limit the time that the kernel will wait for
>> page faults.
> 
> Why is pipe preventing to SIGKILL the task that is blocked on the
> mutex_lock? Is there any good reason for it or it simply has margin
> for improvement regardless of the hangcheck report? It'd be great if
> we can look into that before looking into the uffd specific bits.

It would be possible to use _killable version for this specific function, but

> 
> The hangcheck timer would have zero issues with tasks that can be
> killed, if only the pipe code could be improved to use mutex_lock_killable.
> 
>   /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
>   if (t->state == TASK_UNINTERRUPTIBLE)
>   check_hung_task(t, timeout);
> 
> The hangcheck report is just telling us one task was in D state a
> little too long, but it wasn't fatal error and the kernel wasn't
> actually destabilized and the only malfunction reported is that a task
> was unkillable for too long.

use of killable waits disables ability to detect possibility of deadlock 
(because
lockdep can't check possibility of deadlock which involves actions in 
userspace), for
syzkaller process is SIGKILLed after 5 seconds while khungtaskd's timeout is 
140 seconds.

If we encounter a deadlock in an unattended operation (e.g. some server 
process),
we don't have a method for resolving the deadlock. Therefore, I consider that
t->state == TASK_UNINTERRUPTIBLE check is a bad choice. Unless a sleep is 
neutral
(e.g. no lock is held, or obviously safe to sleep with that specific lock held),
sleeping for 140 seconds inside the kernel is a bad sign even if 
interruptible/killable.

> 
> Now if it's impossible to improve the pipe code so it works better not
> just for uffd, there's still no reason to worry: we could disable uffd
> in the pipe context. For example ptrace opts-out of uffds, so that gdb
> doesn't get stuck if you read a pointer that should be handled by the
> process that is under debug. I hope it won't be necessary but it
> wouldn't be a major issue, certainly it wouldn't risk breaking qemu
> (and non-cooperative APIs are privileged so it could still skip the
> timeout).

Can we do something like this?

  bool retried = false;
retry:
  lock();
  disable_fault();
  ret = access_memory_that_might_fault();
  enable_fault();
  if (ret == -EWOULDFAULT && !retried)
goto retry_without_lock;
  if (ret == 0)
ret = do_something();
  unlock();
  return ret;
retry_without_lock:
  unlock();
  ret = access_memory_that_might_fault();
  retried = true;
  goto retry;



Re: INFO: task hung in pipe_read (2)

2020-08-06 Thread Andrea Arcangeli
Hello!

On Sat, Aug 01, 2020 at 10:39:00AM -0700, Linus Torvalds wrote:
> On Sat, Aug 1, 2020 at 8:30 AM Tetsuo Handa
>  wrote:
> >
> > Waiting for response at 
> > https://lkml.kernel.org/r/45a9b2c8-d0b7-8f00-5b30-0cfe3e028...@i-love.sakura.ne.jp
> >  .
> 
> I think handle_userfault() should have a (shortish) timeout, and just
> return VM_FAULT_RETRY.

The 1sec timeout if applied only to kernel faults (not the case yet
but it'd be enough to solve the hangcheck timer), will work perfectly
for Android, but it will break qemu.

[  916.954313] INFO: task syz-executor.0:61593 blocked for more than 40 seconds.

If you want to enforce a timeout, 40 seconds or something of the order
of the hangcheck timer would be more reasonable.

1sec is of the same order of magnitude of latency that you'd get with
an host kernel upgrade in place with kexec (with the guest memory
being preserved in RAM) that you'd suffer occasionally from in most public 
clouds.

So postcopy live migration should be allowed to take 1 sec latency and
it shouldn't become a deal breaker, that results in the VM getting killed.

> The code is overly complex anyway, because it predates the "just return 
> RETRY".
> 
> And because we can't wait forever when the source of the fault is a
> kernel exception, I think we should add some extra logic to just say
> "if this is a retry, we've already done this once, just return an
> error".

Until the uffp-wp was merged recently, we never needed more than one
VM_FAULT_RETRY to handle uffd-missing faults, you seem to want to go
back to that which again would be fine for uffd-missing faults.

I haven't had time to read and test the testcase properly yet, but at
first glance from reading the hangcheck report it looks like there
would be just one userfault? So I don't see an immediate connection.

The change adding a 1sec timeout would definitely fix this issue, but
it'll also break qemu and probably the vast majority of the users.

> This is a TEST PATCH ONLY. I think we'll actually have to do something
> like this, but I think the final version might need to allow a couple
> of retries, rather than just give up after just one second.
> 
> But for testing your case, this patch might be enough to at least show
> that "yeah, this kind of approach works".
> 
> Andrea? Comments? As mentioned, this is probably much too aggressive,
> but I do think we need to limit the time that the kernel will wait for
> page faults.

Why is pipe preventing to SIGKILL the task that is blocked on the
mutex_lock? Is there any good reason for it or it simply has margin
for improvement regardless of the hangcheck report? It'd be great if
we can look into that before looking into the uffd specific bits.

The hangcheck timer would have zero issues with tasks that can be
killed, if only the pipe code could be improved to use mutex_lock_killable.

/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
if (t->state == TASK_UNINTERRUPTIBLE)
check_hung_task(t, timeout);

The hangcheck report is just telling us one task was in D state a
little too long, but it wasn't fatal error and the kernel wasn't
actually destabilized and the only malfunction reported is that a task
was unkillable for too long.

Now if it's impossible to improve the pipe code so it works better not
just for uffd, there's still no reason to worry: we could disable uffd
in the pipe context. For example ptrace opts-out of uffds, so that gdb
doesn't get stuck if you read a pointer that should be handled by the
process that is under debug. I hope it won't be necessary but it
wouldn't be a major issue, certainly it wouldn't risk breaking qemu
(and non-cooperative APIs are privileged so it could still skip the
timeout).

> Because userfaultfd has become a huge source of security holes as a
> way to time kernel faults or delay them indefinitely.

I assume you refer to the below:

https://duasynt.com/blog/cve-2016-6187-heap-off-by-one-exploit
https://blog.lizzie.io/using-userfaultfd.html

These reports don't happen to mention CONFIG_SLAB_FREELIST_RANDOM=y
which is already enabled by default in enterprise kernels for a reason
and they don't mention the more recent CONFIG_SLAB_FREELIST_HARDENED
and CONFIG_SHUFFLE_PAGE_ALLOCATOR.

Can they test it with those options enabled again, does it still work
so good or not anymore? That would be very helpful to know.

Randomizing which is the next page that gets allocated is much more
important than worrying about uffd because if you removed uffd you may
still have other ways to temporarily stop the page fault depending on
the setup. Example:

https://bugs.chromium.org/p/project-zero/issues/detail?id=808

The above one doesn't use uffd, but it uses fuse. So is fuse also a
source of security holes given they even use it for the exploit in a
preferential way instead of uffd?

"This can be done by abusing the writev() syscall and FUSE: The
attacker mounts a FUSE filesystem that artificia

Re: INFO: task hung in pipe_read (2)

2020-08-01 Thread Linus Torvalds
On Sat, Aug 1, 2020 at 8:30 AM Tetsuo Handa
 wrote:
>
> Waiting for response at 
> https://lkml.kernel.org/r/45a9b2c8-d0b7-8f00-5b30-0cfe3e028...@i-love.sakura.ne.jp
>  .

I think handle_userfault() should have a (shortish) timeout, and just
return VM_FAULT_RETRY.

The code is overly complex anyway, because it predates the "just return RETRY".

And because we can't wait forever when the source of the fault is a
kernel exception, I think we should add some extra logic to just say
"if this is a retry, we've already done this once, just return an
error".

This is a TEST PATCH ONLY. I think we'll actually have to do something
like this, but I think the final version might need to allow a couple
of retries, rather than just give up after just one second.

But for testing your case, this patch might be enough to at least show
that "yeah, this kind of approach works".

Andrea? Comments? As mentioned, this is probably much too aggressive,
but I do think we need to limit the time that the kernel will wait for
page faults.

Because userfaultfd has become a huge source of security holes as a
way to time kernel faults or delay them indefinitely.

 Linus


patch
Description: Binary data


Re: INFO: task hung in pipe_read (2)

2020-08-01 Thread Tetsuo Handa
Waiting for response at 
https://lkml.kernel.org/r/45a9b2c8-d0b7-8f00-5b30-0cfe3e028...@i-love.sakura.ne.jp
 .

#syz dup: INFO: task hung in pipe_write (4)



INFO: task hung in pipe_read (2)

2020-08-01 Thread syzbot
Hello,

syzbot found the following issue on:

HEAD commit:01830e6c Add linux-next specific files for 20200731
git tree:   linux-next
console output: https://syzkaller.appspot.com/x/log.txt?x=11b922e090
kernel config:  https://syzkaller.appspot.com/x/.config?x=2e226b2d1364112c
dashboard link: https://syzkaller.appspot.com/bug?extid=96cc7aba7e969b1d305c
compiler:   gcc (GCC) 10.1.0-syz 20200507
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=140e5d5c90

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+96cc7aba7e969b1d3...@syzkaller.appspotmail.com

INFO: task syz-execprog:6857 blocked for more than 143 seconds.
  Not tainted 5.8.0-rc7-next-20200731-syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
syz-execprogD27640  6857   6837 0x4000
Call Trace:
 context_switch kernel/sched/core.c:3669 [inline]
 __schedule+0x8e5/0x21e0 kernel/sched/core.c:4418
 schedule+0xd0/0x2a0 kernel/sched/core.c:4493
 schedule_preempt_disabled+0xf/0x20 kernel/sched/core.c:4552
 __mutex_lock_common kernel/locking/mutex.c:1033 [inline]
 __mutex_lock+0x3e2/0x10e0 kernel/locking/mutex.c:1103
 __pipe_lock fs/pipe.c:87 [inline]
 pipe_read+0x136/0x13d0 fs/pipe.c:247
 call_read_iter include/linux/fs.h:1870 [inline]
 new_sync_read+0x5b3/0x6e0 fs/read_write.c:414
 vfs_read+0x383/0x5a0 fs/read_write.c:493
 ksys_read+0x1ee/0x250 fs/read_write.c:624
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x4ad88b
Code: Bad RIP value.
RSP: 002b:00c2ae10 EFLAGS: 0202 ORIG_RAX: 
RAX: ffda RBX: 00c20800 RCX: 004ad88b
RDX: 0001 RSI: 00c00039 RDI: 0008
RBP: 00c2ae60 R08: 0001 R09: 0002
R10: 00c00038 R11: 0202 R12: 0003
R13: 00c82a80 R14: 00c000310600 R15: 
INFO: task syz-executor.0:17080 blocked for more than 143 seconds.
  Not tainted 5.8.0-rc7-next-20200731-syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
syz-executor.0  D29144 17080  16608 0x
Call Trace:
 context_switch kernel/sched/core.c:3669 [inline]
 __schedule+0x8e5/0x21e0 kernel/sched/core.c:4418
 schedule+0xd0/0x2a0 kernel/sched/core.c:4493
 schedule_preempt_disabled+0xf/0x20 kernel/sched/core.c:4552
 __mutex_lock_common kernel/locking/mutex.c:1033 [inline]
 __mutex_lock+0x3e2/0x10e0 kernel/locking/mutex.c:1103
 __pipe_lock fs/pipe.c:87 [inline]
 pipe_write+0x12c/0x16c0 fs/pipe.c:435
 call_write_iter include/linux/fs.h:1876 [inline]
 new_sync_write+0x422/0x650 fs/read_write.c:515
 vfs_write+0x5ad/0x730 fs/read_write.c:595
 ksys_write+0x1ee/0x250 fs/read_write.c:648
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x45cc79
Code: Bad RIP value.
RSP: 002b:7fff6c963cf8 EFLAGS: 0246 ORIG_RAX: 0001
RAX: ffda RBX: 00037d40 RCX: 0045cc79
RDX: 0208e24b RSI: 2040 RDI: 
RBP: 0078bf40 R08:  R09: 
R10:  R11: 0246 R12: 00790378
R13:  R14: 0df5 R15: 0078bf0c
INFO: task syz-executor.0:17140 blocked for more than 144 seconds.
  Not tainted 5.8.0-rc7-next-20200731-syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
syz-executor.0  D29144 17140  16608 0x
Call Trace:
 context_switch kernel/sched/core.c:3669 [inline]
 __schedule+0x8e5/0x21e0 kernel/sched/core.c:4418
 schedule+0xd0/0x2a0 kernel/sched/core.c:4493
 schedule_preempt_disabled+0xf/0x20 kernel/sched/core.c:4552
 __mutex_lock_common kernel/locking/mutex.c:1033 [inline]
 __mutex_lock+0x3e2/0x10e0 kernel/locking/mutex.c:1103
 __pipe_lock fs/pipe.c:87 [inline]
 pipe_write+0x12c/0x16c0 fs/pipe.c:435
 call_write_iter include/linux/fs.h:1876 [inline]
 new_sync_write+0x422/0x650 fs/read_write.c:515
 vfs_write+0x5ad/0x730 fs/read_write.c:595
 ksys_write+0x1ee/0x250 fs/read_write.c:648
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x45cc79
Code: Bad RIP value.
RSP: 002b:7fff6c963cf8 EFLAGS: 0246 ORIG_RAX: 0001
RAX: ffda RBX: 00037d40 RCX: 0045cc79
RDX: 0208e24b RSI: 2040 RDI: 
RBP: 0078bf40 R08:  R09: 
R10:  R11: 0246 R12: 00790378
R13:  R14: 0df5 R15: 0078bf0c
INFO: task syz-executor.0:17145 blocked for more than 145 seconds.
  Not tainted 5.8.0-rc7-next-20200731-syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
syz-executor.0  D29144 17145  16608 0x
Call Trace:
 context_swi