Re: possible deadlock in start_this_handle (2)

2021-03-20 Thread Tetsuo Handa
Peter, would you answer to 6 questions listed below?

Below is a reproducer kernel module (prefixed with "my_" for distinction) for
https://syzkaller.appspot.com/bug?id=38c060d5757cbc13fdffd46e80557c645fbe79ba .

-- test.c --
#include 
#include 
#include <../fs/ext4/ext4.h>

static struct lockdep_map my__fs_reclaim_map = 
STATIC_LOCKDEP_MAP_INIT("my_fs_reclaim", __fs_reclaim_map);

static noinline void my_journal_init_common(journal_t *journal)
{
static struct lock_class_key my_jbd2_trans_commit_key;

lockdep_init_map(>j_trans_commit_map, "my_jbd2_handle", 
_jbd2_trans_commit_key, 0);
}

static noinline void my_init_once(void *foo)
{
struct ext4_inode_info *my_ei = (struct ext4_inode_info *) foo;

init_rwsem(_ei->xattr_sem);
}

static int __init lockdep_test_init(void)
{
journal_t *journal1 = kzalloc(sizeof(*journal1), GFP_KERNEL | 
__GFP_NOFAIL);
struct ext4_inode_info *ei1 = kzalloc(sizeof(*ei1), GFP_KERNEL | 
__GFP_NOFAIL);
struct ext4_inode_info *ei2 = kzalloc(sizeof(*ei2), GFP_KERNEL | 
__GFP_NOFAIL);

my_journal_init_common(journal1);
my_init_once(ei1);
my_init_once(ei2);

rwsem_acquire_read(>j_trans_commit_map, 0, 0, _THIS_IP_);
down_write(>xattr_sem);
up_write(>xattr_sem);
rwsem_release(>j_trans_commit_map, _THIS_IP_);

down_write(>xattr_sem);
lock_map_acquire(__fs_reclaim_map);
lock_map_release(__fs_reclaim_map);
up_write(>xattr_sem);

lock_map_acquire(__fs_reclaim_map);
rwsem_acquire_read(>j_trans_commit_map, 0, 0, _THIS_IP_);
rwsem_release(>j_trans_commit_map, _THIS_IP_);
lock_map_release(__fs_reclaim_map);

return -EINVAL;
}

module_init(lockdep_test_init);
MODULE_LICENSE("GPL");
-- test.c --

-- dmesg --
[   32.938906][ T2776] test: loading out-of-tree module taints kernel.
[   32.940200][ T2776]
[   32.940240][ T2776] ==
[   32.940306][ T2776] WARNING: possible circular locking dependency detected
[   32.940373][ T2776] 5.12.0-rc3+ #846 Tainted: G   O
[   32.940434][ T2776] --
[   32.940500][ T2776] insmod/2776 is trying to acquire lock:
[   32.940557][ T2776] 9f1d438d98e0 (my_jbd2_handle){.+.+}-{0:0}, at: 
lockdep_test_init+0x0/0x1000 [test]
[   32.940651][ T2776]
[   32.940651][ T2776] but task is already holding lock:
[   32.940719][ T2776] c0631020 (my_fs_reclaim){+.+.}-{0:0}, at: 
lockdep_test_init+0x0/0x1000 [test]
[   32.940808][ T2776]
[   32.940808][ T2776] which lock already depends on the new lock.
[   32.940808][ T2776]
[   32.940897][ T2776]
[   32.940897][ T2776] the existing dependency chain (in reverse order) is:
[   32.940976][ T2776]
[   32.940976][ T2776] -> #2 (my_fs_reclaim){+.+.}-{0:0}:
[   32.941053][ T2776]lock_acquire+0xb3/0x380
[   32.941112][ T2776]lockdep_test_init+0xe2/0x1000 [test]
[   32.941176][ T2776]do_one_initcall+0x58/0x2c0
[   32.941234][ T2776]do_init_module+0x5b/0x210
[   32.941291][ T2776]load_module+0x1884/0x19a0
[   32.941347][ T2776]__do_sys_finit_module+0xc1/0x120
[   32.941408][ T2776]__x64_sys_finit_module+0x15/0x20
[   32.941469][ T2776]do_syscall_64+0x31/0x40
[   32.941527][ T2776]entry_SYSCALL_64_after_hwframe+0x44/0xae
[   32.941594][ T2776]
[   32.941594][ T2776] -> #1 (_ei->xattr_sem){+.+.}-{3:3}:
[   32.941673][ T2776]lock_acquire+0xb3/0x380
[   32.941728][ T2776]down_write+0x52/0x620
[   32.941783][ T2776]lockdep_test_init+0xa2/0x1000 [test]
[   32.941846][ T2776]do_one_initcall+0x58/0x2c0
[   32.941904][ T2776]do_init_module+0x5b/0x210
[   32.941960][ T2776]load_module+0x1884/0x19a0
[   32.942016][ T2776]__do_sys_finit_module+0xc1/0x120
[   32.942077][ T2776]__x64_sys_finit_module+0x15/0x20
[   32.942137][ T2776]do_syscall_64+0x31/0x40
[   32.942193][ T2776]entry_SYSCALL_64_after_hwframe+0x44/0xae
[   32.942260][ T2776]
[   32.942260][ T2776] -> #0 (my_jbd2_handle){.+.+}-{0:0}:
[   32.942336][ T2776]check_prevs_add+0x16a/0x1040
[   32.942395][ T2776]__lock_acquire+0x118b/0x1580
[   32.942453][ T2776]lock_acquire+0xb3/0x380
[   32.942509][ T2776]lockdep_test_init+0x140/0x1000 [test]
[   32.942574][ T2776]do_one_initcall+0x58/0x2c0
[   32.942631][ T2776]do_init_module+0x5b/0x210
[   32.942687][ T2776]load_module+0x1884/0x19a0
[   32.942743][ T2776]__do_sys_finit_module+0xc1/0x120
[   32.942803][ T2776]__x64_sys_finit_module+0x15/0x20
[   32.942915][ T2776]do_syscall_64+0x31/0x40
[   32.942973][ T2776]entry_SYSCALL_64_after_hwframe+0x44/0xae
[   32.943041][ T2776]
[   32.943041][ T2776] other info that might help us debug this:
[   32.943041][ T2776]
[   

Re: possible deadlock in start_this_handle (2)

2021-02-19 Thread harshad shirwadkar
On Fri, Feb 19, 2021 at 2:20 AM Tetsuo Handa
 wrote:
>
> On 2021/02/15 23:29, Jan Kara wrote:
> > On Mon 15-02-21 23:06:15, Tetsuo Handa wrote:
> >> On 2021/02/15 21:45, Jan Kara wrote:
> >>> On Sat 13-02-21 23:26:37, Tetsuo Handa wrote:
>  Excuse me, but it seems to me that nothing prevents
>  ext4_xattr_set_handle() from reaching ext4_xattr_inode_lookup_create()
>  without memalloc_nofs_save() when hitting ext4_get_nojournal() path.
>  Will you explain when ext4_get_nojournal() path is executed?
> >>>
> >>> That's a good question but sadly I don't think that's it.
> >>> ext4_get_nojournal() is called when the filesystem is created without a
> >>> journal. In that case we also don't acquire jbd2_handle lockdep map. In 
> >>> the
> >>> syzbot report we can see:
> >>
> >> Since syzbot can test filesystem images, syzbot might have tested a 
> >> filesystem
> >> image created both with and without journal within this boot.
> >
> > a) I think that syzbot reboots the VM between executing different tests to
> > get reproducible conditions. But in theory I agree the test may have
> > contained one image with and one image without a journal.
>
> syzkaller reboots the VM upon a crash.
> syzkaller can test multiple filesystem images within one boot.
>
> https://storage.googleapis.com/syzkaller/cover/ci-qemu-upstream-386.html (this
> statistic snapshot is volatile) reports that ext4_get_nojournal() is 
> partially covered
> ( https://github.com/google/syzkaller/blob/master/docs/coverage.md ) by 
> syzkaller.
>
>   /* Just increment the non-pointer handle value */
>   static handle_t *ext4_get_nojournal(void)
>   {
>86 handle_t *handle = current->journal_info;
>   unsigned long ref_cnt = (unsigned long)handle;
>
>   BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT);
>
>86 ref_cnt++;
>   handle = (handle_t *)ref_cnt;
>
>   current->journal_info = handle;
>  2006 return handle;
>   }
>
>
>   /* Decrement the non-pointer handle value */
>   static void ext4_put_nojournal(handle_t *handle)
>   {
>   unsigned long ref_cnt = (unsigned long)handle;
>
>85 BUG_ON(ref_cnt == 0);
>
>85 ref_cnt--;
>   handle = (handle_t *)ref_cnt;
>
>   current->journal_info = handle;
>   }
>
>
>   handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int 
> line,
> int type, int blocks, int rsv_blocks,
> int revoke_creds)
>   {
>   journal_t *journal;
>   int err;
>
>  2006 trace_ext4_journal_start(sb, blocks, rsv_blocks, revoke_creds,
>  2006  _RET_IP_);
>  2006 err = ext4_journal_check_start(sb);
>   if (err < 0)
>   return ERR_PTR(err);
>
>  2005 journal = EXT4_SB(sb)->s_journal;
>  1969 if (!journal || (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
>  2006 return ext4_get_nojournal();
>  1969 return jbd2__journal_start(journal, blocks, rsv_blocks, 
> revoke_creds,
>  GFP_NOFS, type, line);
>   }
>
> >
> > *but*
> >
> > b) as I wrote in the email you are replying to, the jbd2_handle key is
> > private per filesystem. Thus for lockdep to complain about
> > jbd2_handle->fs_reclaim->jbd2_handle deadlock, those jbd2_handle lockdep
> > maps must come from the same filesystem.
> >
> > *and*
> >
> > c) filesystem without journal doesn't use jbd2_handle lockdep map at all so
> > for such filesystems lockdep creates no dependency for jbd2_handle map.
> >
>
> What about "EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)" case?
> Does this case happen on filesystem with journal, and can this case be 
> executed
> by fuzzing a crafted (a sort of erroneous) filesystem with journal, and are
> the jbd2_handle for calling ext4_get_nojournal() case and the jbd2_handle for
> calling jbd2__journal_start() case the same?
EXT4_FC_REPLAY is a mount state that is only set during jbd2 journal
recovery. The only way for jbd2 journal recovery to set EXT4_FC_REPLAY
option is if after a journal crash there are special fast_commit
blocks present in the journal. For these fast_commit blocks to be
present in the journal, the Ext4 file system prior to crash should
have had "fast_commit" feature enabled.

If we have a way to look at the Ext4 partition that syzbot used for
reporting this bug, it is very easy to see if this FC_REPLAY will ever
be set or not. Just running "debugfs " and inside debugfs,
running logdump will show us if there are any fast commit blocks
present in the journal.

Having said that, I have following reason to believe that this option
wasn't set during the syzbot failure:

EXT4_FC_REPLAY will only be set during journal recovery and is cleared
immediately after. Which means EXT4_FC_REPLAY 

Re: possible deadlock in start_this_handle (2)

2021-02-19 Thread Tetsuo Handa
On 2021/02/15 23:29, Jan Kara wrote:
> On Mon 15-02-21 23:06:15, Tetsuo Handa wrote:
>> On 2021/02/15 21:45, Jan Kara wrote:
>>> On Sat 13-02-21 23:26:37, Tetsuo Handa wrote:
 Excuse me, but it seems to me that nothing prevents
 ext4_xattr_set_handle() from reaching ext4_xattr_inode_lookup_create()
 without memalloc_nofs_save() when hitting ext4_get_nojournal() path.
 Will you explain when ext4_get_nojournal() path is executed?
>>>
>>> That's a good question but sadly I don't think that's it.
>>> ext4_get_nojournal() is called when the filesystem is created without a
>>> journal. In that case we also don't acquire jbd2_handle lockdep map. In the
>>> syzbot report we can see:
>>
>> Since syzbot can test filesystem images, syzbot might have tested a 
>> filesystem
>> image created both with and without journal within this boot.
> 
> a) I think that syzbot reboots the VM between executing different tests to
> get reproducible conditions. But in theory I agree the test may have
> contained one image with and one image without a journal.

syzkaller reboots the VM upon a crash.
syzkaller can test multiple filesystem images within one boot.

https://storage.googleapis.com/syzkaller/cover/ci-qemu-upstream-386.html (this
statistic snapshot is volatile) reports that ext4_get_nojournal() is partially 
covered
( https://github.com/google/syzkaller/blob/master/docs/coverage.md ) by 
syzkaller.

  /* Just increment the non-pointer handle value */
  static handle_t *ext4_get_nojournal(void)
  {
   86 handle_t *handle = current->journal_info;
  unsigned long ref_cnt = (unsigned long)handle;
  
  BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT);
  
   86 ref_cnt++;
  handle = (handle_t *)ref_cnt;
  
  current->journal_info = handle;
 2006 return handle;
  }
  
  
  /* Decrement the non-pointer handle value */
  static void ext4_put_nojournal(handle_t *handle)
  {
  unsigned long ref_cnt = (unsigned long)handle;
  
   85 BUG_ON(ref_cnt == 0);
  
   85 ref_cnt--;
  handle = (handle_t *)ref_cnt;
  
  current->journal_info = handle;
  }


  handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int 
line,
int type, int blocks, int rsv_blocks,
int revoke_creds)
  {
  journal_t *journal;
  int err;
  
 2006 trace_ext4_journal_start(sb, blocks, rsv_blocks, revoke_creds,
 2006  _RET_IP_);
 2006 err = ext4_journal_check_start(sb);
  if (err < 0)
  return ERR_PTR(err);
  
 2005 journal = EXT4_SB(sb)->s_journal;
 1969 if (!journal || (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
 2006 return ext4_get_nojournal();
 1969 return jbd2__journal_start(journal, blocks, rsv_blocks, 
revoke_creds,
 GFP_NOFS, type, line);
  }

> 
> *but*
> 
> b) as I wrote in the email you are replying to, the jbd2_handle key is
> private per filesystem. Thus for lockdep to complain about
> jbd2_handle->fs_reclaim->jbd2_handle deadlock, those jbd2_handle lockdep
> maps must come from the same filesystem.
> 
> *and*
> 
> c) filesystem without journal doesn't use jbd2_handle lockdep map at all so
> for such filesystems lockdep creates no dependency for jbd2_handle map.
> 

What about "EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)" case?
Does this case happen on filesystem with journal, and can this case be executed
by fuzzing a crafted (a sort of erroneous) filesystem with journal, and are
the jbd2_handle for calling ext4_get_nojournal() case and the jbd2_handle for
calling jbd2__journal_start() case the same?

Also, I worry that jbd2__journal_restart() is problematic, for it calls
stop_this_handle() (which calls memalloc_nofs_restore()) and then calls
start_this_handle() (which fails to call memalloc_nofs_save() if an error
occurs). An error from start_this_handle() from journal restart operation
might need special handling (i.e. either remount-ro or panic) ?



Re: possible deadlock in start_this_handle (2)

2021-02-15 Thread Jan Kara
On Mon 15-02-21 23:06:15, Tetsuo Handa wrote:
> On 2021/02/15 21:45, Jan Kara wrote:
> > On Sat 13-02-21 23:26:37, Tetsuo Handa wrote:
> >> Excuse me, but it seems to me that nothing prevents
> >> ext4_xattr_set_handle() from reaching ext4_xattr_inode_lookup_create()
> >> without memalloc_nofs_save() when hitting ext4_get_nojournal() path.
> >> Will you explain when ext4_get_nojournal() path is executed?
> > 
> > That's a good question but sadly I don't think that's it.
> > ext4_get_nojournal() is called when the filesystem is created without a
> > journal. In that case we also don't acquire jbd2_handle lockdep map. In the
> > syzbot report we can see:
> 
> Since syzbot can test filesystem images, syzbot might have tested a filesystem
> image created both with and without journal within this boot.

a) I think that syzbot reboots the VM between executing different tests to
get reproducible conditions. But in theory I agree the test may have
contained one image with and one image without a journal.

*but*

b) as I wrote in the email you are replying to, the jbd2_handle key is
private per filesystem. Thus for lockdep to complain about
jbd2_handle->fs_reclaim->jbd2_handle deadlock, those jbd2_handle lockdep
maps must come from the same filesystem.

*and*

c) filesystem without journal doesn't use jbd2_handle lockdep map at all so
for such filesystems lockdep creates no dependency for jbd2_handle map.

Honza

> 
> > 
> > kswapd0/2246 is trying to acquire lock:
> > 888041a988e0 (jbd2_handle){}-{0:0}, at: 
> > start_this_handle+0xf81/0x1380 fs/jbd2/transaction.c:444
> > 
> > but task is already holding lock:
> > 8be892c0 (fs_reclaim){+.+.}-{0:0}, at: 
> > __fs_reclaim_acquire+0x0/0x30 mm/page_alloc.c:5195
> > 
> > So this filesystem has very clearly been created with a journal. Also the
> > journal lockdep tracking machinery uses:
> 
> While locks held by kswapd0/2246 are fs_reclaim, shrinker_rwsem, 
> >s_umount_key#38
> and jbd2_handle, isn't the dependency lockdep considers problematic is
> 
>   Chain exists of:
> jbd2_handle --> >xattr_sem --> fs_reclaim
> 
>Possible unsafe locking scenario:
> 
>  CPU0CPU1
>  
> lock(fs_reclaim);
>  lock(>xattr_sem);
>  lock(fs_reclaim);
> lock(jbd2_handle);
> 
> where CPU0 is kswapd/2246 and CPU1 is the case of ext4_get_nojournal() path?
> If someone has taken jbd2_handle and >xattr_sem in this order, isn't this
> dependency true?
> 
> > 
> > rwsem_acquire_read(>j_trans_commit_map, 0, 0, _THIS_IP_);
> > 
> > so a lockdep key is per-filesystem. Thus it is not possible that lockdep
> > would combine lock dependencies from two different filesystems.
> > 
> > But I guess we could narrow the search for this problem by adding WARN_ONs
> > to ext4_xattr_set_handle() and ext4_xattr_inode_lookup_create() like:
> > 
> > WARN_ON(ext4_handle_valid(handle) && !(current->flags & PF_MEMALLOC_NOFS));
> > 
> > It would narrow down a place in which PF_MEMALLOC_NOFS flag isn't set
> > properly... At least that seems like the most plausible way forward to me.
> 
> You can use CONFIG_DEBUG_AID_FOR_SYZBOT for adding such WARN_ONs on 
> linux-next.
> 
-- 
Jan Kara 
SUSE Labs, CR


Re: possible deadlock in start_this_handle (2)

2021-02-15 Thread Tetsuo Handa
On 2021/02/15 21:45, Jan Kara wrote:
> On Sat 13-02-21 23:26:37, Tetsuo Handa wrote:
>> Excuse me, but it seems to me that nothing prevents
>> ext4_xattr_set_handle() from reaching ext4_xattr_inode_lookup_create()
>> without memalloc_nofs_save() when hitting ext4_get_nojournal() path.
>> Will you explain when ext4_get_nojournal() path is executed?
> 
> That's a good question but sadly I don't think that's it.
> ext4_get_nojournal() is called when the filesystem is created without a
> journal. In that case we also don't acquire jbd2_handle lockdep map. In the
> syzbot report we can see:

Since syzbot can test filesystem images, syzbot might have tested a filesystem
image created both with and without journal within this boot.

> 
> kswapd0/2246 is trying to acquire lock:
> 888041a988e0 (jbd2_handle){}-{0:0}, at: 
> start_this_handle+0xf81/0x1380 fs/jbd2/transaction.c:444
> 
> but task is already holding lock:
> 8be892c0 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x0/0x30 
> mm/page_alloc.c:5195
> 
> So this filesystem has very clearly been created with a journal. Also the
> journal lockdep tracking machinery uses:

While locks held by kswapd0/2246 are fs_reclaim, shrinker_rwsem, 
>s_umount_key#38
and jbd2_handle, isn't the dependency lockdep considers problematic is

  Chain exists of:
jbd2_handle --> >xattr_sem --> fs_reclaim

   Possible unsafe locking scenario:

 CPU0CPU1
 
lock(fs_reclaim);
 lock(>xattr_sem);
 lock(fs_reclaim);
lock(jbd2_handle);

where CPU0 is kswapd/2246 and CPU1 is the case of ext4_get_nojournal() path?
If someone has taken jbd2_handle and >xattr_sem in this order, isn't this
dependency true?

> 
> rwsem_acquire_read(>j_trans_commit_map, 0, 0, _THIS_IP_);
> 
> so a lockdep key is per-filesystem. Thus it is not possible that lockdep
> would combine lock dependencies from two different filesystems.
> 
> But I guess we could narrow the search for this problem by adding WARN_ONs
> to ext4_xattr_set_handle() and ext4_xattr_inode_lookup_create() like:
> 
> WARN_ON(ext4_handle_valid(handle) && !(current->flags & PF_MEMALLOC_NOFS));
> 
> It would narrow down a place in which PF_MEMALLOC_NOFS flag isn't set
> properly... At least that seems like the most plausible way forward to me.

You can use CONFIG_DEBUG_AID_FOR_SYZBOT for adding such WARN_ONs on linux-next.



Re: possible deadlock in start_this_handle (2)

2021-02-15 Thread Jan Kara
On Sat 13-02-21 23:26:37, Tetsuo Handa wrote:
> On 2021/02/11 19:49, Jan Kara wrote:
> > This stacktrace should never happen. ext4_xattr_set() starts a transaction.
> > That internally goes through start_this_handle() which calls:
> > 
> > handle->saved_alloc_context = memalloc_nofs_save();
> > 
> > and we restore the allocation context only in stop_this_handle() when
> > stopping the handle. And with this fs_reclaim_acquire() should remove
> > __GFP_FS from the mask and not call __fs_reclaim_acquire().
> 
> Excuse me, but it seems to me that nothing prevents
> ext4_xattr_set_handle() from reaching ext4_xattr_inode_lookup_create()
> without memalloc_nofs_save() when hitting ext4_get_nojournal() path.
> Will you explain when ext4_get_nojournal() path is executed?

That's a good question but sadly I don't think that's it.
ext4_get_nojournal() is called when the filesystem is created without a
journal. In that case we also don't acquire jbd2_handle lockdep map. In the
syzbot report we can see:

kswapd0/2246 is trying to acquire lock:
888041a988e0 (jbd2_handle){}-{0:0}, at: start_this_handle+0xf81/0x1380 
fs/jbd2/transaction.c:444

but task is already holding lock:
8be892c0 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x0/0x30 
mm/page_alloc.c:5195

So this filesystem has very clearly been created with a journal. Also the
journal lockdep tracking machinery uses:

rwsem_acquire_read(>j_trans_commit_map, 0, 0, _THIS_IP_);

so a lockdep key is per-filesystem. Thus it is not possible that lockdep
would combine lock dependencies from two different filesystems.

But I guess we could narrow the search for this problem by adding WARN_ONs
to ext4_xattr_set_handle() and ext4_xattr_inode_lookup_create() like:

WARN_ON(ext4_handle_valid(handle) && !(current->flags & PF_MEMALLOC_NOFS));

It would narrow down a place in which PF_MEMALLOC_NOFS flag isn't set
properly... At least that seems like the most plausible way forward to me.

Honza

> 
> ext4_xattr_set() {
>   handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits) == 
> __ext4_journal_start() {
>   return __ext4_journal_start_sb() {
> journal = EXT4_SB(sb)->s_journal;
> if (!journal || (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
>   return ext4_get_nojournal(); // Never calls memalloc_nofs_save() 
> despite returning !IS_ERR() value.
> return jbd2__journal_start(journal, blocks, rsv_blocks, revoke_creds, 
> GFP_NOFS, type, line); // Calls memalloc_nofs_save() when start_this_handle() 
> returns 0.
>   }
> }
>   }
>   error = ext4_xattr_set_handle(handle, inode, name_index, name, value, 
> value_len, flags); {
> ext4_write_lock_xattr(inode, _expand); // Grabs >xattr_sem
> error = ext4_xattr_ibody_set(handle, inode, , ) {
>   error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */) 
> {
> ret = ext4_xattr_inode_lookup_create(handle, inode, i->value, 
> i->value_len, _ea_inode); // Using GFP_KERNEL based on assumption that 
> ext4_journal_start() called memalloc_nofs_save().
>   }
> }
>   }
> }
> 
-- 
Jan Kara 
SUSE Labs, CR


Re: possible deadlock in start_this_handle (2)

2021-02-13 Thread Tetsuo Handa
On 2021/02/11 19:49, Jan Kara wrote:
> This stacktrace should never happen. ext4_xattr_set() starts a transaction.
> That internally goes through start_this_handle() which calls:
> 
>   handle->saved_alloc_context = memalloc_nofs_save();
> 
> and we restore the allocation context only in stop_this_handle() when
> stopping the handle. And with this fs_reclaim_acquire() should remove
> __GFP_FS from the mask and not call __fs_reclaim_acquire().

Excuse me, but it seems to me that nothing prevents ext4_xattr_set_handle() 
from reaching
ext4_xattr_inode_lookup_create() without memalloc_nofs_save() when hitting 
ext4_get_nojournal() path.
Will you explain when ext4_get_nojournal() path is executed?

ext4_xattr_set() {
  handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits) == 
__ext4_journal_start() {
  return __ext4_journal_start_sb() {
journal = EXT4_SB(sb)->s_journal;
if (!journal || (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
  return ext4_get_nojournal(); // Never calls memalloc_nofs_save() 
despite returning !IS_ERR() value.
return jbd2__journal_start(journal, blocks, rsv_blocks, revoke_creds, 
GFP_NOFS, type, line); // Calls memalloc_nofs_save() when start_this_handle() 
returns 0.
  }
}
  }
  error = ext4_xattr_set_handle(handle, inode, name_index, name, value, 
value_len, flags); {
ext4_write_lock_xattr(inode, _expand); // Grabs >xattr_sem
error = ext4_xattr_ibody_set(handle, inode, , ) {
  error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */) {
ret = ext4_xattr_inode_lookup_create(handle, inode, i->value, 
i->value_len, _ea_inode); // Using GFP_KERNEL based on assumption that 
ext4_journal_start() called memalloc_nofs_save().
  }
}
  }
}



Re: possible deadlock in start_this_handle (2)

2021-02-13 Thread Dmitry Vyukov
On Fri, Feb 12, 2021 at 4:43 PM Michal Hocko  wrote:
>
> On Fri 12-02-21 21:58:15, Tetsuo Handa wrote:
> > On 2021/02/12 21:30, Michal Hocko wrote:
> > > On Fri 12-02-21 12:22:07, Matthew Wilcox wrote:
> > >> On Fri, Feb 12, 2021 at 08:18:11PM +0900, Tetsuo Handa wrote:
> > >>> On 2021/02/12 1:41, Michal Hocko wrote:
> >  But I suspect we have drifted away from the original issue. I thought
> >  that a simple check would help us narrow down this particular case and
> >  somebody messing up from the IRQ context didn't sound like a completely
> >  off.
> > 
> > >>>
> > >>>  From my experience at 
> > >>> https://lkml.kernel.org/r/201409192053.ihj35462.jlomosoffvt...@i-love.sakura.ne.jp
> > >>>  ,
> > >>> I think we can replace direct PF_* manipulation with macros which do 
> > >>> not receive "struct task_struct *" argument.
> > >>> Since TASK_PFA_TEST()/TASK_PFA_SET()/TASK_PFA_CLEAR() are for 
> > >>> manipulating PFA_* flags on a remote thread, we can
> > >>> define similar ones for manipulating PF_* flags on current thread. 
> > >>> Then, auditing dangerous users becomes easier.
> > >>
> > >> No, nobody is manipulating another task's GFP flags.
> > >
> > > Agreed. And nobody should be manipulating PF flags on remote tasks
> > > either.
> > >
> >
> > No. You are misunderstanding. The bug report above is an example of
> > manipulating PF flags on remote tasks.
>
> The bug report you are referring to is ancient. And the cpuset code
> doesn't touch task->flags for a long time. I haven't checked exactly but
> it is years since regular and atomic flags have been separated unless I
> misremember.
>
> > You say "nobody should", but the reality is "there indeed was". There
> > might be unnoticed others. The point of this proposal is to make it
> > possible to "find such unnoticed users who are manipulating PF flags
> > on remote tasks".
>
> I am really confused what you are proposing here TBH and referring to an
> ancient bug doesn't really help. task->flags are _explicitly_ documented
> to be only used for _current_. Is it possible that somebody writes a
> buggy code? Sure, should we build a whole infrastructure around that to
> catch such a broken code? I am not really sure. One bug 6 years ago
> doesn't sound like a good reason for that.

Another similar one was just reported:

https://syzkaller.appspot.com/bug?extid=1b2c6989ec12e467d65c

WARNING: possible circular locking dependency detected
5.11.0-rc7-syzkaller #0 Not tainted
--
kswapd0/2232 is trying to acquire lock:
88801f552650 (sb_internal){.+.+}-{0:0}, at: evict+0x2ed/0x6b0 fs/inode.c:577

but task is already holding lock:
8be89240 (fs_reclaim){+.+.}-{0:0}, at:
__fs_reclaim_acquire+0x0/0x30 mm/page_alloc.c:5195

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #3 (fs_reclaim){+.+.}-{0:0}:
   __fs_reclaim_acquire mm/page_alloc.c:4326 [inline]
   fs_reclaim_acquire+0x117/0x150 mm/page_alloc.c:4340
   might_alloc include/linux/sched/mm.h:193 [inline]
   slab_pre_alloc_hook mm/slab.h:493 [inline]
   slab_alloc_node mm/slab.c:3221 [inline]
   kmem_cache_alloc_node_trace+0x48/0x520 mm/slab.c:3596
   __do_kmalloc_node mm/slab.c:3618 [inline]
   __kmalloc_node+0x38/0x60 mm/slab.c:3626
   kmalloc_node include/linux/slab.h:575 [inline]
   kvmalloc_node+0x61/0xf0 mm/util.c:587
   kvmalloc include/linux/mm.h:781 [inline]
   ext4_xattr_inode_cache_find fs/ext4/xattr.c:1465 [inline]
   ext4_xattr_inode_lookup_create fs/ext4/xattr.c:1508 [inline]
   ext4_xattr_set_entry+0x1ce6/0x3780 fs/ext4/xattr.c:1649
   ext4_xattr_ibody_set+0x78/0x2b0 fs/ext4/xattr.c:2224
   ext4_xattr_set_handle+0x8f4/0x13e0 fs/ext4/xattr.c:2380
   ext4_xattr_set+0x13a/0x340 fs/ext4/xattr.c:2493
   __vfs_setxattr+0x10e/0x170 fs/xattr.c:177
   __vfs_setxattr_noperm+0x11a/0x4c0 fs/xattr.c:208
   __vfs_setxattr_locked+0x1bf/0x250 fs/xattr.c:266
   vfs_setxattr+0x135/0x320 fs/xattr.c:291
   setxattr+0x1ff/0x290 fs/xattr.c:553
   path_setxattr+0x170/0x190 fs/xattr.c:572
   __do_sys_setxattr fs/xattr.c:587 [inline]
   __se_sys_setxattr fs/xattr.c:583 [inline]
   __x64_sys_setxattr+0xc0/0x160 fs/xattr.c:583
   do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46


Re: possible deadlock in start_this_handle (2)

2021-02-12 Thread Michal Hocko
On Fri 12-02-21 21:58:15, Tetsuo Handa wrote:
> On 2021/02/12 21:30, Michal Hocko wrote:
> > On Fri 12-02-21 12:22:07, Matthew Wilcox wrote:
> >> On Fri, Feb 12, 2021 at 08:18:11PM +0900, Tetsuo Handa wrote:
> >>> On 2021/02/12 1:41, Michal Hocko wrote:
>  But I suspect we have drifted away from the original issue. I thought
>  that a simple check would help us narrow down this particular case and
>  somebody messing up from the IRQ context didn't sound like a completely
>  off.
> 
> >>>
> >>>  From my experience at 
> >>> https://lkml.kernel.org/r/201409192053.ihj35462.jlomosoffvt...@i-love.sakura.ne.jp
> >>>  ,
> >>> I think we can replace direct PF_* manipulation with macros which do not 
> >>> receive "struct task_struct *" argument.
> >>> Since TASK_PFA_TEST()/TASK_PFA_SET()/TASK_PFA_CLEAR() are for 
> >>> manipulating PFA_* flags on a remote thread, we can
> >>> define similar ones for manipulating PF_* flags on current thread. Then, 
> >>> auditing dangerous users becomes easier.
> >>
> >> No, nobody is manipulating another task's GFP flags.
> > 
> > Agreed. And nobody should be manipulating PF flags on remote tasks
> > either.
> > 
> 
> No. You are misunderstanding. The bug report above is an example of
> manipulating PF flags on remote tasks.

The bug report you are referring to is ancient. And the cpuset code
doesn't touch task->flags for a long time. I haven't checked exactly but
it is years since regular and atomic flags have been separated unless I
misremember.

> You say "nobody should", but the reality is "there indeed was". There
> might be unnoticed others. The point of this proposal is to make it
> possible to "find such unnoticed users who are manipulating PF flags
> on remote tasks".

I am really confused what you are proposing here TBH and referring to an
ancient bug doesn't really help. task->flags are _explicitly_ documented
to be only used for _current_. Is it possible that somebody writes a
buggy code? Sure, should we build a whole infrastructure around that to
catch such a broken code? I am not really sure. One bug 6 years ago
doesn't sound like a good reason for that.

-- 
Michal Hocko
SUSE Labs


Re: possible deadlock in start_this_handle (2)

2021-02-12 Thread Tetsuo Handa
On 2021/02/12 22:12, Michal Hocko wrote:
> On Fri 12-02-21 21:58:15, Tetsuo Handa wrote:
>> On 2021/02/12 21:30, Michal Hocko wrote:
>>> On Fri 12-02-21 12:22:07, Matthew Wilcox wrote:
 On Fri, Feb 12, 2021 at 08:18:11PM +0900, Tetsuo Handa wrote:
> On 2021/02/12 1:41, Michal Hocko wrote:
>> But I suspect we have drifted away from the original issue. I thought
>> that a simple check would help us narrow down this particular case and
>> somebody messing up from the IRQ context didn't sound like a completely
>> off.
>>
>
>  From my experience at 
> https://lkml.kernel.org/r/201409192053.ihj35462.jlomosoffvt...@i-love.sakura.ne.jp
>  ,
> I think we can replace direct PF_* manipulation with macros which do not 
> receive "struct task_struct *" argument.
> Since TASK_PFA_TEST()/TASK_PFA_SET()/TASK_PFA_CLEAR() are for 
> manipulating PFA_* flags on a remote thread, we can
> define similar ones for manipulating PF_* flags on current thread. Then, 
> auditing dangerous users becomes easier.

 No, nobody is manipulating another task's GFP flags.
>>>
>>> Agreed. And nobody should be manipulating PF flags on remote tasks
>>> either.
>>>
>>
>> No. You are misunderstanding. The bug report above is an example of 
>> manipulating PF flags on remote tasks.
> 
> Could you be more specific? I do not remember there was any theory that
> somebody is manipulating flags on a remote task. A very vague theory was
> that an interrupt context might be doing that on the _current_ context
> but even that is not based on any real evidence. It is a pure
> speculation.
> 

Please read the link above. The report is an example of manipulating PF flags 
on a remote task.
You are thinking interrupt context as the only possible culprit, but you should 
also think
concurrent access as the other possible culprit.


Re: possible deadlock in start_this_handle (2)

2021-02-12 Thread Michal Hocko
On Fri 12-02-21 21:58:15, Tetsuo Handa wrote:
> On 2021/02/12 21:30, Michal Hocko wrote:
> > On Fri 12-02-21 12:22:07, Matthew Wilcox wrote:
> >> On Fri, Feb 12, 2021 at 08:18:11PM +0900, Tetsuo Handa wrote:
> >>> On 2021/02/12 1:41, Michal Hocko wrote:
>  But I suspect we have drifted away from the original issue. I thought
>  that a simple check would help us narrow down this particular case and
>  somebody messing up from the IRQ context didn't sound like a completely
>  off.
> 
> >>>
> >>>  From my experience at 
> >>> https://lkml.kernel.org/r/201409192053.ihj35462.jlomosoffvt...@i-love.sakura.ne.jp
> >>>  ,
> >>> I think we can replace direct PF_* manipulation with macros which do not 
> >>> receive "struct task_struct *" argument.
> >>> Since TASK_PFA_TEST()/TASK_PFA_SET()/TASK_PFA_CLEAR() are for 
> >>> manipulating PFA_* flags on a remote thread, we can
> >>> define similar ones for manipulating PF_* flags on current thread. Then, 
> >>> auditing dangerous users becomes easier.
> >>
> >> No, nobody is manipulating another task's GFP flags.
> > 
> > Agreed. And nobody should be manipulating PF flags on remote tasks
> > either.
> > 
> 
> No. You are misunderstanding. The bug report above is an example of 
> manipulating PF flags on remote tasks.

Could you be more specific? I do not remember there was any theory that
somebody is manipulating flags on a remote task. A very vague theory was
that an interrupt context might be doing that on the _current_ context
but even that is not based on any real evidence. It is a pure
speculation.
-- 
Michal Hocko
SUSE Labs


Re: possible deadlock in start_this_handle (2)

2021-02-12 Thread Tetsuo Handa
On 2021/02/12 21:30, Michal Hocko wrote:
> On Fri 12-02-21 12:22:07, Matthew Wilcox wrote:
>> On Fri, Feb 12, 2021 at 08:18:11PM +0900, Tetsuo Handa wrote:
>>> On 2021/02/12 1:41, Michal Hocko wrote:
 But I suspect we have drifted away from the original issue. I thought
 that a simple check would help us narrow down this particular case and
 somebody messing up from the IRQ context didn't sound like a completely
 off.

>>>
>>>  From my experience at 
>>> https://lkml.kernel.org/r/201409192053.ihj35462.jlomosoffvt...@i-love.sakura.ne.jp
>>>  ,
>>> I think we can replace direct PF_* manipulation with macros which do not 
>>> receive "struct task_struct *" argument.
>>> Since TASK_PFA_TEST()/TASK_PFA_SET()/TASK_PFA_CLEAR() are for manipulating 
>>> PFA_* flags on a remote thread, we can
>>> define similar ones for manipulating PF_* flags on current thread. Then, 
>>> auditing dangerous users becomes easier.
>>
>> No, nobody is manipulating another task's GFP flags.
> 
> Agreed. And nobody should be manipulating PF flags on remote tasks
> either.
> 

No. You are misunderstanding. The bug report above is an example of 
manipulating PF flags on remote tasks.
You say "nobody should", but the reality is "there indeed was". There might be 
unnoticed others. The point of
this proposal is to make it possible to "find such unnoticed users who are 
manipulating PF flags on remote tasks".


Re: possible deadlock in start_this_handle (2)

2021-02-12 Thread Michal Hocko
On Fri 12-02-21 12:22:07, Matthew Wilcox wrote:
> On Fri, Feb 12, 2021 at 08:18:11PM +0900, Tetsuo Handa wrote:
> > On 2021/02/12 1:41, Michal Hocko wrote:
> > > But I suspect we have drifted away from the original issue. I thought
> > > that a simple check would help us narrow down this particular case and
> > > somebody messing up from the IRQ context didn't sound like a completely
> > > off.
> > > 
> > 
> >  From my experience at 
> > https://lkml.kernel.org/r/201409192053.ihj35462.jlomosoffvt...@i-love.sakura.ne.jp
> >  ,
> > I think we can replace direct PF_* manipulation with macros which do not 
> > receive "struct task_struct *" argument.
> > Since TASK_PFA_TEST()/TASK_PFA_SET()/TASK_PFA_CLEAR() are for manipulating 
> > PFA_* flags on a remote thread, we can
> > define similar ones for manipulating PF_* flags on current thread. Then, 
> > auditing dangerous users becomes easier.
> 
> No, nobody is manipulating another task's GFP flags.

Agreed. And nobody should be manipulating PF flags on remote tasks
either.

-- 
Michal Hocko
SUSE Labs


Re: possible deadlock in start_this_handle (2)

2021-02-12 Thread Matthew Wilcox
On Fri, Feb 12, 2021 at 08:18:11PM +0900, Tetsuo Handa wrote:
> On 2021/02/12 1:41, Michal Hocko wrote:
> > But I suspect we have drifted away from the original issue. I thought
> > that a simple check would help us narrow down this particular case and
> > somebody messing up from the IRQ context didn't sound like a completely
> > off.
> > 
> 
>  From my experience at 
> https://lkml.kernel.org/r/201409192053.ihj35462.jlomosoffvt...@i-love.sakura.ne.jp
>  ,
> I think we can replace direct PF_* manipulation with macros which do not 
> receive "struct task_struct *" argument.
> Since TASK_PFA_TEST()/TASK_PFA_SET()/TASK_PFA_CLEAR() are for manipulating 
> PFA_* flags on a remote thread, we can
> define similar ones for manipulating PF_* flags on current thread. Then, 
> auditing dangerous users becomes easier.

No, nobody is manipulating another task's GFP flags.


Re: possible deadlock in start_this_handle (2)

2021-02-12 Thread Tetsuo Handa
On 2021/02/12 1:41, Michal Hocko wrote:
> But I suspect we have drifted away from the original issue. I thought
> that a simple check would help us narrow down this particular case and
> somebody messing up from the IRQ context didn't sound like a completely
> off.
> 

 From my experience at 
https://lkml.kernel.org/r/201409192053.ihj35462.jlomosoffvt...@i-love.sakura.ne.jp
 ,
I think we can replace direct PF_* manipulation with macros which do not 
receive "struct task_struct *" argument.
Since TASK_PFA_TEST()/TASK_PFA_SET()/TASK_PFA_CLEAR() are for manipulating 
PFA_* flags on a remote thread, we can
define similar ones for manipulating PF_* flags on current thread. Then, 
auditing dangerous users becomes easier.


Re: possible deadlock in start_this_handle (2)

2021-02-11 Thread Michal Hocko
On Thu 11-02-21 14:26:30, Matthew Wilcox wrote:
> On Thu, Feb 11, 2021 at 03:20:41PM +0100, Michal Hocko wrote:
> > On Thu 11-02-21 13:25:33, Matthew Wilcox wrote:
> > > On Thu, Feb 11, 2021 at 02:07:03PM +0100, Michal Hocko wrote:
> > > > On Thu 11-02-21 12:57:17, Matthew Wilcox wrote:
> > > > > > current->flags should be always manipulated from the user context. 
> > > > > > But
> > > > > > who knows maybe there is a bug and some interrupt handler is 
> > > > > > calling it.
> > > > > > This should be easy to catch no?
> > > > > 
> > > > > Why would it matter if it were?
> > > > 
> > > > I was thinking about a clobbered state because updates to ->flags are
> > > > not atomic because this shouldn't ever be updated concurrently. So maybe
> > > > a racing interrupt could corrupt the flags state?
> > > 
> > > I don't think that's possible.  Same-CPU races between interrupt and
> > > process context are simpler because the CPU always observes its own writes
> > > in order and the interrupt handler completes "between" two instructions.
> > 
> > I have to confess I haven't really thought the scenario through. My idea
> > was to simply add a simple check for an irq context into ->flags setting
> > routine because this should never be done in the first place. Not only
> > for scope gfp flags but any other PF_ flags IIRC.
> 
> That's not automatically clear to me.  There are plenty of places
> where an interrupt borrows the context of the task that it happens to
> have interrupted.  Specifically, interrupts should be using GFP_ATOMIC
> anyway, so this doesn't really make a lot of sense, but I don't think
> it's necessarily wrong for an interrupt to call a function that says
> "Definitely don't make GFP_FS allocations between these two points".

Not sure I got your point. IRQ context never does reclaim so anything
outside of NOWAIT/ATOMIC is pointless. But you might be refering to a
future code where GFP_FS might have a meaning outside of the reclaim
context?

Anyway if we are to allow modifying PF_ flags from an interrupt contenxt
then I believe we should make that code IRQ aware at least. I do not
feel really comfortable about async modifications when this is stated to
be safe doing in a non atomic way.

But I suspect we have drifted away from the original issue. I thought
that a simple check would help us narrow down this particular case and
somebody messing up from the IRQ context didn't sound like a completely
off.

-- 
Michal Hocko
SUSE Labs


Re: possible deadlock in start_this_handle (2)

2021-02-11 Thread Matthew Wilcox
On Thu, Feb 11, 2021 at 03:20:41PM +0100, Michal Hocko wrote:
> On Thu 11-02-21 13:25:33, Matthew Wilcox wrote:
> > On Thu, Feb 11, 2021 at 02:07:03PM +0100, Michal Hocko wrote:
> > > On Thu 11-02-21 12:57:17, Matthew Wilcox wrote:
> > > > > current->flags should be always manipulated from the user context. But
> > > > > who knows maybe there is a bug and some interrupt handler is calling 
> > > > > it.
> > > > > This should be easy to catch no?
> > > > 
> > > > Why would it matter if it were?
> > > 
> > > I was thinking about a clobbered state because updates to ->flags are
> > > not atomic because this shouldn't ever be updated concurrently. So maybe
> > > a racing interrupt could corrupt the flags state?
> > 
> > I don't think that's possible.  Same-CPU races between interrupt and
> > process context are simpler because the CPU always observes its own writes
> > in order and the interrupt handler completes "between" two instructions.
> 
> I have to confess I haven't really thought the scenario through. My idea
> was to simply add a simple check for an irq context into ->flags setting
> routine because this should never be done in the first place. Not only
> for scope gfp flags but any other PF_ flags IIRC.

That's not automatically clear to me.  There are plenty of places
where an interrupt borrows the context of the task that it happens to
have interrupted.  Specifically, interrupts should be using GFP_ATOMIC
anyway, so this doesn't really make a lot of sense, but I don't think
it's necessarily wrong for an interrupt to call a function that says
"Definitely don't make GFP_FS allocations between these two points".


Re: possible deadlock in start_this_handle (2)

2021-02-11 Thread Michal Hocko
On Thu 11-02-21 13:25:33, Matthew Wilcox wrote:
> On Thu, Feb 11, 2021 at 02:07:03PM +0100, Michal Hocko wrote:
> > On Thu 11-02-21 12:57:17, Matthew Wilcox wrote:
> > > > current->flags should be always manipulated from the user context. But
> > > > who knows maybe there is a bug and some interrupt handler is calling it.
> > > > This should be easy to catch no?
> > > 
> > > Why would it matter if it were?
> > 
> > I was thinking about a clobbered state because updates to ->flags are
> > not atomic because this shouldn't ever be updated concurrently. So maybe
> > a racing interrupt could corrupt the flags state?
> 
> I don't think that's possible.  Same-CPU races between interrupt and
> process context are simpler because the CPU always observes its own writes
> in order and the interrupt handler completes "between" two instructions.

I have to confess I haven't really thought the scenario through. My idea
was to simply add a simple check for an irq context into ->flags setting
routine because this should never be done in the first place. Not only
for scope gfp flags but any other PF_ flags IIRC.

-- 
Michal Hocko
SUSE Labs


Re: possible deadlock in start_this_handle (2)

2021-02-11 Thread Matthew Wilcox
On Thu, Feb 11, 2021 at 02:07:03PM +0100, Michal Hocko wrote:
> On Thu 11-02-21 12:57:17, Matthew Wilcox wrote:
> > > current->flags should be always manipulated from the user context. But
> > > who knows maybe there is a bug and some interrupt handler is calling it.
> > > This should be easy to catch no?
> > 
> > Why would it matter if it were?
> 
> I was thinking about a clobbered state because updates to ->flags are
> not atomic because this shouldn't ever be updated concurrently. So maybe
> a racing interrupt could corrupt the flags state?

I don't think that's possible.  Same-CPU races between interrupt and
process context are simpler because the CPU always observes its own writes
in order and the interrupt handler completes "between" two instructions.

eg a load-store CPU will do:

load 0 from address A
or 8 with result
store 8 to A

Two CPUs can do:

CPU 0   CPU 1
load 0 from A
load 0 from A
or 8 with 0
or 4 with 0
store 8 to A
store 4 to A

and the store of 8 is lost.

process interrupt
load 0 from A
load 0 from A
or 4 with 0
store 4 to A
or 8 with 0
store 8 to A

so the store of 4 would be lost.

but we expect the interrupt handler to restore it.  so we actually have this:

load 0 from A
load 0 from A
or 4 with 0
store 4 to A
load 4 from A
clear 4 from 4
store 0 to A
or 8 with 0
store 8 to A


If we have a leak where someone forgets to restore the nofs, that might
cause this.  We could check for the allocation mask bits being clear at
syscall exit (scheduling with these flags set is obviously ok).


Re: possible deadlock in start_this_handle (2)

2021-02-11 Thread Dmitry Vyukov
On Thu, Feb 11, 2021 at 1:57 PM Matthew Wilcox  wrote:
> > > > > > Hello,
> > > > > >
> > > > > > added mm guys to CC.
> > > > > >
> > > > > > On Wed 10-02-21 05:35:18, syzbot wrote:
> > > > > > > HEAD commit:1e0d27fc Merge branch 'akpm' (patches from Andrew)
> > > > > > > git tree:   upstream
> > > > > > > console output: 
> > > > > > > https://syzkaller.appspot.com/x/log.txt?x=15cbce90d0
> > > > > > > kernel config:  
> > > > > > > https://syzkaller.appspot.com/x/.config?x=bd1f72220b2e57eb
> > > > > > > dashboard link: 
> > > > > > > https://syzkaller.appspot.com/bug?extid=bfdded10ab7dcd7507ae
> > > > > > > userspace arch: i386
> > > > > > >
> > > > > > > Unfortunately, I don't have any reproducer for this issue yet.
> > > > > > >
> > > > > > > IMPORTANT: if you fix the issue, please add the following tag to 
> > > > > > > the commit:
> > > > > > > Reported-by: syzbot+bfdded10ab7dcd750...@syzkaller.appspotmail.com
> > > > > > >
> > > > > > > ==
> > > > > > > WARNING: possible circular locking dependency detected
> > > > > > > 5.11.0-rc6-syzkaller #0 Not tainted
> > > > > > > --
> > > > > > > kswapd0/2246 is trying to acquire lock:
> > > > > > > 888041a988e0 (jbd2_handle){}-{0:0}, at: 
> > > > > > > start_this_handle+0xf81/0x1380 fs/jbd2/transaction.c:444
> > > > > > >
> > > > > > > but task is already holding lock:
> > > > > > > 8be892c0 (fs_reclaim){+.+.}-{0:0}, at: 
> > > > > > > __fs_reclaim_acquire+0x0/0x30 mm/page_alloc.c:5195
> > > > > > >
> > > > > > > which lock already depends on the new lock.
> > > > > > >
> > > > > > > the existing dependency chain (in reverse order) is:
> > > > > > >
> > > > > > > -> #2 (fs_reclaim){+.+.}-{0:0}:
> > > > > > >__fs_reclaim_acquire mm/page_alloc.c:4326 [inline]
> > > > > > >fs_reclaim_acquire+0x117/0x150 mm/page_alloc.c:4340
> > > > > > >might_alloc include/linux/sched/mm.h:193 [inline]
> > > > > > >slab_pre_alloc_hook mm/slab.h:493 [inline]
> > > > > > >slab_alloc_node mm/slub.c:2817 [inline]
> > > > > > >__kmalloc_node+0x5f/0x430 mm/slub.c:4015
> > > > > > >kmalloc_node include/linux/slab.h:575 [inline]
> > > > > > >kvmalloc_node+0x61/0xf0 mm/util.c:587
> > > > > > >kvmalloc include/linux/mm.h:781 [inline]
> > > > > > >ext4_xattr_inode_cache_find fs/ext4/xattr.c:1465 [inline]
> > > > > > >ext4_xattr_inode_lookup_create fs/ext4/xattr.c:1508 
> > > > > > > [inline]
> > > > > > >ext4_xattr_set_entry+0x1ce6/0x3780 fs/ext4/xattr.c:1649
> > > > > > >ext4_xattr_ibody_set+0x78/0x2b0 fs/ext4/xattr.c:2224
> > > > > > >ext4_xattr_set_handle+0x8f4/0x13e0 fs/ext4/xattr.c:2380
> > > > > > >ext4_xattr_set+0x13a/0x340 fs/ext4/xattr.c:2493
> > > > > > >ext4_xattr_user_set+0xbc/0x100 fs/ext4/xattr_user.c:40
> > > > > > >__vfs_setxattr+0x10e/0x170 fs/xattr.c:177
> > > > > > >__vfs_setxattr_noperm+0x11a/0x4c0 fs/xattr.c:208
> > > > > > >__vfs_setxattr_locked+0x1bf/0x250 fs/xattr.c:266
> > > > > > >vfs_setxattr+0x135/0x320 fs/xattr.c:291
> > > > > > >setxattr+0x1ff/0x290 fs/xattr.c:553
> > > > > > >path_setxattr+0x170/0x190 fs/xattr.c:572
> > > > > > >__do_sys_setxattr fs/xattr.c:587 [inline]
> > > > > > >__se_sys_setxattr fs/xattr.c:583 [inline]
> > > > > > >__ia32_sys_setxattr+0xbc/0x150 fs/xattr.c:583
> > > > > > >do_syscall_32_irqs_on arch/x86/entry/common.c:77 [inline]
> > > > > > >__do_fast_syscall_32+0x56/0x80 arch/x86/entry/common.c:139
> > > > > > >do_fast_syscall_32+0x2f/0x70 arch/x86/entry/common.c:164
> > > > > > >entry_SYSENTER_compat_after_hwframe+0x4d/0x5c
> > > > > >
> > > > > > This stacktrace should never happen. ext4_xattr_set() starts a 
> > > > > > transaction.
> > > > > > That internally goes through start_this_handle() which calls:
> > > > > >
> > > > > > handle->saved_alloc_context = memalloc_nofs_save();
> > > > > >
> > > > > > and we restore the allocation context only in stop_this_handle() 
> > > > > > when
> > > > > > stopping the handle. And with this fs_reclaim_acquire() should 
> > > > > > remove
> > > > > > __GFP_FS from the mask and not call __fs_reclaim_acquire().
> > > > > >
> > > > > > Now I have no idea why something here didn't work out. Given we 
> > > > > > don't have
> > > > > > a reproducer it will be probably difficult to debug this. I'd note 
> > > > > > that
> > > > > > about year and half ago similar report happened (got autoclosed) so 
> > > > > > it may
> > > > > > be something real somewhere but it may also be just some HW glitch 
> > > > > > or
> > > > > > something like that.
> > > > >
> > > > > HW glitch is theoretically possible. But if we are considering such
> > > > > causes, I would say a kernel memory corruption is way more likely, we
> > > > > have hundreds 

Re: possible deadlock in start_this_handle (2)

2021-02-11 Thread Michal Hocko
On Thu 11-02-21 12:57:17, Matthew Wilcox wrote:
> On Thu, Feb 11, 2021 at 01:34:48PM +0100, Michal Hocko wrote:
> > On Thu 11-02-21 13:10:20, Jan Kara wrote:
> > > On Thu 11-02-21 12:28:48, Dmitry Vyukov wrote:
> > > > On Thu, Feb 11, 2021 at 12:22 PM Dmitry Vyukov  
> > > > wrote:
> > > > >
> > > > > On Thu, Feb 11, 2021 at 11:49 AM Jan Kara  wrote:
> > > > > >
> > > > > > Hello,
> > > > > >
> > > > > > added mm guys to CC.
> > > > > >
> > > > > > On Wed 10-02-21 05:35:18, syzbot wrote:
> > > > > > > HEAD commit:1e0d27fc Merge branch 'akpm' (patches from Andrew)
> > > > > > > git tree:   upstream
> > > > > > > console output: 
> > > > > > > https://syzkaller.appspot.com/x/log.txt?x=15cbce90d0
> > > > > > > kernel config:  
> > > > > > > https://syzkaller.appspot.com/x/.config?x=bd1f72220b2e57eb
> > > > > > > dashboard link: 
> > > > > > > https://syzkaller.appspot.com/bug?extid=bfdded10ab7dcd7507ae
> > > > > > > userspace arch: i386
> > > > > > >
> > > > > > > Unfortunately, I don't have any reproducer for this issue yet.
> > > > > > >
> > > > > > > IMPORTANT: if you fix the issue, please add the following tag to 
> > > > > > > the commit:
> > > > > > > Reported-by: syzbot+bfdded10ab7dcd750...@syzkaller.appspotmail.com
> > > > > > >
> > > > > > > ==
> > > > > > > WARNING: possible circular locking dependency detected
> > > > > > > 5.11.0-rc6-syzkaller #0 Not tainted
> > > > > > > --
> > > > > > > kswapd0/2246 is trying to acquire lock:
> > > > > > > 888041a988e0 (jbd2_handle){}-{0:0}, at: 
> > > > > > > start_this_handle+0xf81/0x1380 fs/jbd2/transaction.c:444
> > > > > > >
> > > > > > > but task is already holding lock:
> > > > > > > 8be892c0 (fs_reclaim){+.+.}-{0:0}, at: 
> > > > > > > __fs_reclaim_acquire+0x0/0x30 mm/page_alloc.c:5195
> > > > > > >
> > > > > > > which lock already depends on the new lock.
> > > > > > >
> > > > > > > the existing dependency chain (in reverse order) is:
> > > > > > >
> > > > > > > -> #2 (fs_reclaim){+.+.}-{0:0}:
> > > > > > >__fs_reclaim_acquire mm/page_alloc.c:4326 [inline]
> > > > > > >fs_reclaim_acquire+0x117/0x150 mm/page_alloc.c:4340
> > > > > > >might_alloc include/linux/sched/mm.h:193 [inline]
> > > > > > >slab_pre_alloc_hook mm/slab.h:493 [inline]
> > > > > > >slab_alloc_node mm/slub.c:2817 [inline]
> > > > > > >__kmalloc_node+0x5f/0x430 mm/slub.c:4015
> > > > > > >kmalloc_node include/linux/slab.h:575 [inline]
> > > > > > >kvmalloc_node+0x61/0xf0 mm/util.c:587
> > > > > > >kvmalloc include/linux/mm.h:781 [inline]
> > > > > > >ext4_xattr_inode_cache_find fs/ext4/xattr.c:1465 [inline]
> > > > > > >ext4_xattr_inode_lookup_create fs/ext4/xattr.c:1508 
> > > > > > > [inline]
> > > > > > >ext4_xattr_set_entry+0x1ce6/0x3780 fs/ext4/xattr.c:1649
> > > > > > >ext4_xattr_ibody_set+0x78/0x2b0 fs/ext4/xattr.c:2224
> > > > > > >ext4_xattr_set_handle+0x8f4/0x13e0 fs/ext4/xattr.c:2380
> > > > > > >ext4_xattr_set+0x13a/0x340 fs/ext4/xattr.c:2493
> > > > > > >ext4_xattr_user_set+0xbc/0x100 fs/ext4/xattr_user.c:40
> > > > > > >__vfs_setxattr+0x10e/0x170 fs/xattr.c:177
> > > > > > >__vfs_setxattr_noperm+0x11a/0x4c0 fs/xattr.c:208
> > > > > > >__vfs_setxattr_locked+0x1bf/0x250 fs/xattr.c:266
> > > > > > >vfs_setxattr+0x135/0x320 fs/xattr.c:291
> > > > > > >setxattr+0x1ff/0x290 fs/xattr.c:553
> > > > > > >path_setxattr+0x170/0x190 fs/xattr.c:572
> > > > > > >__do_sys_setxattr fs/xattr.c:587 [inline]
> > > > > > >__se_sys_setxattr fs/xattr.c:583 [inline]
> > > > > > >__ia32_sys_setxattr+0xbc/0x150 fs/xattr.c:583
> > > > > > >do_syscall_32_irqs_on arch/x86/entry/common.c:77 [inline]
> > > > > > >__do_fast_syscall_32+0x56/0x80 arch/x86/entry/common.c:139
> > > > > > >do_fast_syscall_32+0x2f/0x70 arch/x86/entry/common.c:164
> > > > > > >entry_SYSENTER_compat_after_hwframe+0x4d/0x5c
> > > > > >
> > > > > > This stacktrace should never happen. ext4_xattr_set() starts a 
> > > > > > transaction.
> > > > > > That internally goes through start_this_handle() which calls:
> > > > > >
> > > > > > handle->saved_alloc_context = memalloc_nofs_save();
> > > > > >
> > > > > > and we restore the allocation context only in stop_this_handle() 
> > > > > > when
> > > > > > stopping the handle. And with this fs_reclaim_acquire() should 
> > > > > > remove
> > > > > > __GFP_FS from the mask and not call __fs_reclaim_acquire().
> > > > > >
> > > > > > Now I have no idea why something here didn't work out. Given we 
> > > > > > don't have
> > > > > > a reproducer it will be probably difficult to debug this. I'd note 
> > > > > > that
> > > > > > about year and half ago similar report happened (got autoclosed) so 
> > > > > > it may
> > > > > 

Re: possible deadlock in start_this_handle (2)

2021-02-11 Thread Matthew Wilcox
On Thu, Feb 11, 2021 at 01:34:48PM +0100, Michal Hocko wrote:
> On Thu 11-02-21 13:10:20, Jan Kara wrote:
> > On Thu 11-02-21 12:28:48, Dmitry Vyukov wrote:
> > > On Thu, Feb 11, 2021 at 12:22 PM Dmitry Vyukov  wrote:
> > > >
> > > > On Thu, Feb 11, 2021 at 11:49 AM Jan Kara  wrote:
> > > > >
> > > > > Hello,
> > > > >
> > > > > added mm guys to CC.
> > > > >
> > > > > On Wed 10-02-21 05:35:18, syzbot wrote:
> > > > > > HEAD commit:1e0d27fc Merge branch 'akpm' (patches from Andrew)
> > > > > > git tree:   upstream
> > > > > > console output: 
> > > > > > https://syzkaller.appspot.com/x/log.txt?x=15cbce90d0
> > > > > > kernel config:  
> > > > > > https://syzkaller.appspot.com/x/.config?x=bd1f72220b2e57eb
> > > > > > dashboard link: 
> > > > > > https://syzkaller.appspot.com/bug?extid=bfdded10ab7dcd7507ae
> > > > > > userspace arch: i386
> > > > > >
> > > > > > Unfortunately, I don't have any reproducer for this issue yet.
> > > > > >
> > > > > > IMPORTANT: if you fix the issue, please add the following tag to 
> > > > > > the commit:
> > > > > > Reported-by: syzbot+bfdded10ab7dcd750...@syzkaller.appspotmail.com
> > > > > >
> > > > > > ==
> > > > > > WARNING: possible circular locking dependency detected
> > > > > > 5.11.0-rc6-syzkaller #0 Not tainted
> > > > > > --
> > > > > > kswapd0/2246 is trying to acquire lock:
> > > > > > 888041a988e0 (jbd2_handle){}-{0:0}, at: 
> > > > > > start_this_handle+0xf81/0x1380 fs/jbd2/transaction.c:444
> > > > > >
> > > > > > but task is already holding lock:
> > > > > > 8be892c0 (fs_reclaim){+.+.}-{0:0}, at: 
> > > > > > __fs_reclaim_acquire+0x0/0x30 mm/page_alloc.c:5195
> > > > > >
> > > > > > which lock already depends on the new lock.
> > > > > >
> > > > > > the existing dependency chain (in reverse order) is:
> > > > > >
> > > > > > -> #2 (fs_reclaim){+.+.}-{0:0}:
> > > > > >__fs_reclaim_acquire mm/page_alloc.c:4326 [inline]
> > > > > >fs_reclaim_acquire+0x117/0x150 mm/page_alloc.c:4340
> > > > > >might_alloc include/linux/sched/mm.h:193 [inline]
> > > > > >slab_pre_alloc_hook mm/slab.h:493 [inline]
> > > > > >slab_alloc_node mm/slub.c:2817 [inline]
> > > > > >__kmalloc_node+0x5f/0x430 mm/slub.c:4015
> > > > > >kmalloc_node include/linux/slab.h:575 [inline]
> > > > > >kvmalloc_node+0x61/0xf0 mm/util.c:587
> > > > > >kvmalloc include/linux/mm.h:781 [inline]
> > > > > >ext4_xattr_inode_cache_find fs/ext4/xattr.c:1465 [inline]
> > > > > >ext4_xattr_inode_lookup_create fs/ext4/xattr.c:1508 [inline]
> > > > > >ext4_xattr_set_entry+0x1ce6/0x3780 fs/ext4/xattr.c:1649
> > > > > >ext4_xattr_ibody_set+0x78/0x2b0 fs/ext4/xattr.c:2224
> > > > > >ext4_xattr_set_handle+0x8f4/0x13e0 fs/ext4/xattr.c:2380
> > > > > >ext4_xattr_set+0x13a/0x340 fs/ext4/xattr.c:2493
> > > > > >ext4_xattr_user_set+0xbc/0x100 fs/ext4/xattr_user.c:40
> > > > > >__vfs_setxattr+0x10e/0x170 fs/xattr.c:177
> > > > > >__vfs_setxattr_noperm+0x11a/0x4c0 fs/xattr.c:208
> > > > > >__vfs_setxattr_locked+0x1bf/0x250 fs/xattr.c:266
> > > > > >vfs_setxattr+0x135/0x320 fs/xattr.c:291
> > > > > >setxattr+0x1ff/0x290 fs/xattr.c:553
> > > > > >path_setxattr+0x170/0x190 fs/xattr.c:572
> > > > > >__do_sys_setxattr fs/xattr.c:587 [inline]
> > > > > >__se_sys_setxattr fs/xattr.c:583 [inline]
> > > > > >__ia32_sys_setxattr+0xbc/0x150 fs/xattr.c:583
> > > > > >do_syscall_32_irqs_on arch/x86/entry/common.c:77 [inline]
> > > > > >__do_fast_syscall_32+0x56/0x80 arch/x86/entry/common.c:139
> > > > > >do_fast_syscall_32+0x2f/0x70 arch/x86/entry/common.c:164
> > > > > >entry_SYSENTER_compat_after_hwframe+0x4d/0x5c
> > > > >
> > > > > This stacktrace should never happen. ext4_xattr_set() starts a 
> > > > > transaction.
> > > > > That internally goes through start_this_handle() which calls:
> > > > >
> > > > > handle->saved_alloc_context = memalloc_nofs_save();
> > > > >
> > > > > and we restore the allocation context only in stop_this_handle() when
> > > > > stopping the handle. And with this fs_reclaim_acquire() should remove
> > > > > __GFP_FS from the mask and not call __fs_reclaim_acquire().
> > > > >
> > > > > Now I have no idea why something here didn't work out. Given we don't 
> > > > > have
> > > > > a reproducer it will be probably difficult to debug this. I'd note 
> > > > > that
> > > > > about year and half ago similar report happened (got autoclosed) so 
> > > > > it may
> > > > > be something real somewhere but it may also be just some HW glitch or
> > > > > something like that.
> > > >
> > > > HW glitch is theoretically possible. But if we are considering such
> > > > causes, I would say a kernel memory corruption is way more likely, we
> > > > have 

Re: possible deadlock in start_this_handle (2)

2021-02-11 Thread Michal Hocko
On Thu 11-02-21 13:10:20, Jan Kara wrote:
> On Thu 11-02-21 12:28:48, Dmitry Vyukov wrote:
> > On Thu, Feb 11, 2021 at 12:22 PM Dmitry Vyukov  wrote:
> > >
> > > On Thu, Feb 11, 2021 at 11:49 AM Jan Kara  wrote:
> > > >
> > > > Hello,
> > > >
> > > > added mm guys to CC.
> > > >
> > > > On Wed 10-02-21 05:35:18, syzbot wrote:
> > > > > HEAD commit:1e0d27fc Merge branch 'akpm' (patches from Andrew)
> > > > > git tree:   upstream
> > > > > console output: 
> > > > > https://syzkaller.appspot.com/x/log.txt?x=15cbce90d0
> > > > > kernel config:  
> > > > > https://syzkaller.appspot.com/x/.config?x=bd1f72220b2e57eb
> > > > > dashboard link: 
> > > > > https://syzkaller.appspot.com/bug?extid=bfdded10ab7dcd7507ae
> > > > > userspace arch: i386
> > > > >
> > > > > Unfortunately, I don't have any reproducer for this issue yet.
> > > > >
> > > > > IMPORTANT: if you fix the issue, please add the following tag to the 
> > > > > commit:
> > > > > Reported-by: syzbot+bfdded10ab7dcd750...@syzkaller.appspotmail.com
> > > > >
> > > > > ==
> > > > > WARNING: possible circular locking dependency detected
> > > > > 5.11.0-rc6-syzkaller #0 Not tainted
> > > > > --
> > > > > kswapd0/2246 is trying to acquire lock:
> > > > > 888041a988e0 (jbd2_handle){}-{0:0}, at: 
> > > > > start_this_handle+0xf81/0x1380 fs/jbd2/transaction.c:444
> > > > >
> > > > > but task is already holding lock:
> > > > > 8be892c0 (fs_reclaim){+.+.}-{0:0}, at: 
> > > > > __fs_reclaim_acquire+0x0/0x30 mm/page_alloc.c:5195
> > > > >
> > > > > which lock already depends on the new lock.
> > > > >
> > > > > the existing dependency chain (in reverse order) is:
> > > > >
> > > > > -> #2 (fs_reclaim){+.+.}-{0:0}:
> > > > >__fs_reclaim_acquire mm/page_alloc.c:4326 [inline]
> > > > >fs_reclaim_acquire+0x117/0x150 mm/page_alloc.c:4340
> > > > >might_alloc include/linux/sched/mm.h:193 [inline]
> > > > >slab_pre_alloc_hook mm/slab.h:493 [inline]
> > > > >slab_alloc_node mm/slub.c:2817 [inline]
> > > > >__kmalloc_node+0x5f/0x430 mm/slub.c:4015
> > > > >kmalloc_node include/linux/slab.h:575 [inline]
> > > > >kvmalloc_node+0x61/0xf0 mm/util.c:587
> > > > >kvmalloc include/linux/mm.h:781 [inline]
> > > > >ext4_xattr_inode_cache_find fs/ext4/xattr.c:1465 [inline]
> > > > >ext4_xattr_inode_lookup_create fs/ext4/xattr.c:1508 [inline]
> > > > >ext4_xattr_set_entry+0x1ce6/0x3780 fs/ext4/xattr.c:1649
> > > > >ext4_xattr_ibody_set+0x78/0x2b0 fs/ext4/xattr.c:2224
> > > > >ext4_xattr_set_handle+0x8f4/0x13e0 fs/ext4/xattr.c:2380
> > > > >ext4_xattr_set+0x13a/0x340 fs/ext4/xattr.c:2493
> > > > >ext4_xattr_user_set+0xbc/0x100 fs/ext4/xattr_user.c:40
> > > > >__vfs_setxattr+0x10e/0x170 fs/xattr.c:177
> > > > >__vfs_setxattr_noperm+0x11a/0x4c0 fs/xattr.c:208
> > > > >__vfs_setxattr_locked+0x1bf/0x250 fs/xattr.c:266
> > > > >vfs_setxattr+0x135/0x320 fs/xattr.c:291
> > > > >setxattr+0x1ff/0x290 fs/xattr.c:553
> > > > >path_setxattr+0x170/0x190 fs/xattr.c:572
> > > > >__do_sys_setxattr fs/xattr.c:587 [inline]
> > > > >__se_sys_setxattr fs/xattr.c:583 [inline]
> > > > >__ia32_sys_setxattr+0xbc/0x150 fs/xattr.c:583
> > > > >do_syscall_32_irqs_on arch/x86/entry/common.c:77 [inline]
> > > > >__do_fast_syscall_32+0x56/0x80 arch/x86/entry/common.c:139
> > > > >do_fast_syscall_32+0x2f/0x70 arch/x86/entry/common.c:164
> > > > >entry_SYSENTER_compat_after_hwframe+0x4d/0x5c
> > > >
> > > > This stacktrace should never happen. ext4_xattr_set() starts a 
> > > > transaction.
> > > > That internally goes through start_this_handle() which calls:
> > > >
> > > > handle->saved_alloc_context = memalloc_nofs_save();
> > > >
> > > > and we restore the allocation context only in stop_this_handle() when
> > > > stopping the handle. And with this fs_reclaim_acquire() should remove
> > > > __GFP_FS from the mask and not call __fs_reclaim_acquire().
> > > >
> > > > Now I have no idea why something here didn't work out. Given we don't 
> > > > have
> > > > a reproducer it will be probably difficult to debug this. I'd note that
> > > > about year and half ago similar report happened (got autoclosed) so it 
> > > > may
> > > > be something real somewhere but it may also be just some HW glitch or
> > > > something like that.
> > >
> > > HW glitch is theoretically possible. But if we are considering such
> > > causes, I would say a kernel memory corruption is way more likely, we
> > > have hundreds of known memory-corruption-capable bugs open. In most
> > > cases they are caught by KASAN before doing silent damage. But KASAN
> > > can miss some cases.
> > >
> > > I see at least 4 existing bugs with similar stack:
> > > 

Re: possible deadlock in start_this_handle (2)

2021-02-11 Thread Jan Kara
On Thu 11-02-21 12:28:48, Dmitry Vyukov wrote:
> On Thu, Feb 11, 2021 at 12:22 PM Dmitry Vyukov  wrote:
> >
> > On Thu, Feb 11, 2021 at 11:49 AM Jan Kara  wrote:
> > >
> > > Hello,
> > >
> > > added mm guys to CC.
> > >
> > > On Wed 10-02-21 05:35:18, syzbot wrote:
> > > > HEAD commit:1e0d27fc Merge branch 'akpm' (patches from Andrew)
> > > > git tree:   upstream
> > > > console output: https://syzkaller.appspot.com/x/log.txt?x=15cbce90d0
> > > > kernel config:  
> > > > https://syzkaller.appspot.com/x/.config?x=bd1f72220b2e57eb
> > > > dashboard link: 
> > > > https://syzkaller.appspot.com/bug?extid=bfdded10ab7dcd7507ae
> > > > userspace arch: i386
> > > >
> > > > Unfortunately, I don't have any reproducer for this issue yet.
> > > >
> > > > IMPORTANT: if you fix the issue, please add the following tag to the 
> > > > commit:
> > > > Reported-by: syzbot+bfdded10ab7dcd750...@syzkaller.appspotmail.com
> > > >
> > > > ==
> > > > WARNING: possible circular locking dependency detected
> > > > 5.11.0-rc6-syzkaller #0 Not tainted
> > > > --
> > > > kswapd0/2246 is trying to acquire lock:
> > > > 888041a988e0 (jbd2_handle){}-{0:0}, at: 
> > > > start_this_handle+0xf81/0x1380 fs/jbd2/transaction.c:444
> > > >
> > > > but task is already holding lock:
> > > > 8be892c0 (fs_reclaim){+.+.}-{0:0}, at: 
> > > > __fs_reclaim_acquire+0x0/0x30 mm/page_alloc.c:5195
> > > >
> > > > which lock already depends on the new lock.
> > > >
> > > > the existing dependency chain (in reverse order) is:
> > > >
> > > > -> #2 (fs_reclaim){+.+.}-{0:0}:
> > > >__fs_reclaim_acquire mm/page_alloc.c:4326 [inline]
> > > >fs_reclaim_acquire+0x117/0x150 mm/page_alloc.c:4340
> > > >might_alloc include/linux/sched/mm.h:193 [inline]
> > > >slab_pre_alloc_hook mm/slab.h:493 [inline]
> > > >slab_alloc_node mm/slub.c:2817 [inline]
> > > >__kmalloc_node+0x5f/0x430 mm/slub.c:4015
> > > >kmalloc_node include/linux/slab.h:575 [inline]
> > > >kvmalloc_node+0x61/0xf0 mm/util.c:587
> > > >kvmalloc include/linux/mm.h:781 [inline]
> > > >ext4_xattr_inode_cache_find fs/ext4/xattr.c:1465 [inline]
> > > >ext4_xattr_inode_lookup_create fs/ext4/xattr.c:1508 [inline]
> > > >ext4_xattr_set_entry+0x1ce6/0x3780 fs/ext4/xattr.c:1649
> > > >ext4_xattr_ibody_set+0x78/0x2b0 fs/ext4/xattr.c:2224
> > > >ext4_xattr_set_handle+0x8f4/0x13e0 fs/ext4/xattr.c:2380
> > > >ext4_xattr_set+0x13a/0x340 fs/ext4/xattr.c:2493
> > > >ext4_xattr_user_set+0xbc/0x100 fs/ext4/xattr_user.c:40
> > > >__vfs_setxattr+0x10e/0x170 fs/xattr.c:177
> > > >__vfs_setxattr_noperm+0x11a/0x4c0 fs/xattr.c:208
> > > >__vfs_setxattr_locked+0x1bf/0x250 fs/xattr.c:266
> > > >vfs_setxattr+0x135/0x320 fs/xattr.c:291
> > > >setxattr+0x1ff/0x290 fs/xattr.c:553
> > > >path_setxattr+0x170/0x190 fs/xattr.c:572
> > > >__do_sys_setxattr fs/xattr.c:587 [inline]
> > > >__se_sys_setxattr fs/xattr.c:583 [inline]
> > > >__ia32_sys_setxattr+0xbc/0x150 fs/xattr.c:583
> > > >do_syscall_32_irqs_on arch/x86/entry/common.c:77 [inline]
> > > >__do_fast_syscall_32+0x56/0x80 arch/x86/entry/common.c:139
> > > >do_fast_syscall_32+0x2f/0x70 arch/x86/entry/common.c:164
> > > >entry_SYSENTER_compat_after_hwframe+0x4d/0x5c
> > >
> > > This stacktrace should never happen. ext4_xattr_set() starts a 
> > > transaction.
> > > That internally goes through start_this_handle() which calls:
> > >
> > > handle->saved_alloc_context = memalloc_nofs_save();
> > >
> > > and we restore the allocation context only in stop_this_handle() when
> > > stopping the handle. And with this fs_reclaim_acquire() should remove
> > > __GFP_FS from the mask and not call __fs_reclaim_acquire().
> > >
> > > Now I have no idea why something here didn't work out. Given we don't have
> > > a reproducer it will be probably difficult to debug this. I'd note that
> > > about year and half ago similar report happened (got autoclosed) so it may
> > > be something real somewhere but it may also be just some HW glitch or
> > > something like that.
> >
> > HW glitch is theoretically possible. But if we are considering such
> > causes, I would say a kernel memory corruption is way more likely, we
> > have hundreds of known memory-corruption-capable bugs open. In most
> > cases they are caught by KASAN before doing silent damage. But KASAN
> > can miss some cases.
> >
> > I see at least 4 existing bugs with similar stack:
> > https://syzkaller.appspot.com/bug?extid=bfdded10ab7dcd7507ae
> > https://syzkaller.appspot.com/bug?extid=a7ab8df042baaf42ae3c
> > https://syzkaller.appspot.com/bug?id=c814a55a728493959328551c769ede4c8ff72aab
> > https://syzkaller.appspot.com/bug?id=426ad9adca053dafcd698f3a48ad5406dccc972b
> >
> 

Re: possible deadlock in start_this_handle (2)

2021-02-11 Thread Jan Kara
On Thu 11-02-21 12:22:39, Dmitry Vyukov wrote:
> On Thu, Feb 11, 2021 at 11:49 AM Jan Kara  wrote:
> >
> > Hello,
> >
> > added mm guys to CC.
> >
> > On Wed 10-02-21 05:35:18, syzbot wrote:
> > > HEAD commit:1e0d27fc Merge branch 'akpm' (patches from Andrew)
> > > git tree:   upstream
> > > console output: https://syzkaller.appspot.com/x/log.txt?x=15cbce90d0
> > > kernel config:  https://syzkaller.appspot.com/x/.config?x=bd1f72220b2e57eb
> > > dashboard link: 
> > > https://syzkaller.appspot.com/bug?extid=bfdded10ab7dcd7507ae
> > > userspace arch: i386
> > >
> > > Unfortunately, I don't have any reproducer for this issue yet.
> > >
> > > IMPORTANT: if you fix the issue, please add the following tag to the 
> > > commit:
> > > Reported-by: syzbot+bfdded10ab7dcd750...@syzkaller.appspotmail.com
> > >
> > > ==
> > > WARNING: possible circular locking dependency detected
> > > 5.11.0-rc6-syzkaller #0 Not tainted
> > > --
> > > kswapd0/2246 is trying to acquire lock:
> > > 888041a988e0 (jbd2_handle){}-{0:0}, at: 
> > > start_this_handle+0xf81/0x1380 fs/jbd2/transaction.c:444
> > >
> > > but task is already holding lock:
> > > 8be892c0 (fs_reclaim){+.+.}-{0:0}, at: 
> > > __fs_reclaim_acquire+0x0/0x30 mm/page_alloc.c:5195
> > >
> > > which lock already depends on the new lock.
> > >
> > > the existing dependency chain (in reverse order) is:
> > >
> > > -> #2 (fs_reclaim){+.+.}-{0:0}:
> > >__fs_reclaim_acquire mm/page_alloc.c:4326 [inline]
> > >fs_reclaim_acquire+0x117/0x150 mm/page_alloc.c:4340
> > >might_alloc include/linux/sched/mm.h:193 [inline]
> > >slab_pre_alloc_hook mm/slab.h:493 [inline]
> > >slab_alloc_node mm/slub.c:2817 [inline]
> > >__kmalloc_node+0x5f/0x430 mm/slub.c:4015
> > >kmalloc_node include/linux/slab.h:575 [inline]
> > >kvmalloc_node+0x61/0xf0 mm/util.c:587
> > >kvmalloc include/linux/mm.h:781 [inline]
> > >ext4_xattr_inode_cache_find fs/ext4/xattr.c:1465 [inline]
> > >ext4_xattr_inode_lookup_create fs/ext4/xattr.c:1508 [inline]
> > >ext4_xattr_set_entry+0x1ce6/0x3780 fs/ext4/xattr.c:1649
> > >ext4_xattr_ibody_set+0x78/0x2b0 fs/ext4/xattr.c:2224
> > >ext4_xattr_set_handle+0x8f4/0x13e0 fs/ext4/xattr.c:2380
> > >ext4_xattr_set+0x13a/0x340 fs/ext4/xattr.c:2493
> > >ext4_xattr_user_set+0xbc/0x100 fs/ext4/xattr_user.c:40
> > >__vfs_setxattr+0x10e/0x170 fs/xattr.c:177
> > >__vfs_setxattr_noperm+0x11a/0x4c0 fs/xattr.c:208
> > >__vfs_setxattr_locked+0x1bf/0x250 fs/xattr.c:266
> > >vfs_setxattr+0x135/0x320 fs/xattr.c:291
> > >setxattr+0x1ff/0x290 fs/xattr.c:553
> > >path_setxattr+0x170/0x190 fs/xattr.c:572
> > >__do_sys_setxattr fs/xattr.c:587 [inline]
> > >__se_sys_setxattr fs/xattr.c:583 [inline]
> > >__ia32_sys_setxattr+0xbc/0x150 fs/xattr.c:583
> > >do_syscall_32_irqs_on arch/x86/entry/common.c:77 [inline]
> > >__do_fast_syscall_32+0x56/0x80 arch/x86/entry/common.c:139
> > >do_fast_syscall_32+0x2f/0x70 arch/x86/entry/common.c:164
> > >entry_SYSENTER_compat_after_hwframe+0x4d/0x5c
> >
> > This stacktrace should never happen. ext4_xattr_set() starts a transaction.
> > That internally goes through start_this_handle() which calls:
> >
> > handle->saved_alloc_context = memalloc_nofs_save();
> >
> > and we restore the allocation context only in stop_this_handle() when
> > stopping the handle. And with this fs_reclaim_acquire() should remove
> > __GFP_FS from the mask and not call __fs_reclaim_acquire().
> >
> > Now I have no idea why something here didn't work out. Given we don't have
> > a reproducer it will be probably difficult to debug this. I'd note that
> > about year and half ago similar report happened (got autoclosed) so it may
> > be something real somewhere but it may also be just some HW glitch or
> > something like that.
> 
> HW glitch is theoretically possible. But if we are considering such
> causes, I would say a kernel memory corruption is way more likely, we
> have hundreds of known memory-corruption-capable bugs open. In most
> cases they are caught by KASAN before doing silent damage. But KASAN
> can miss some cases.
> 
> I see at least 4 existing bugs with similar stack:
> https://syzkaller.appspot.com/bug?extid=bfdded10ab7dcd7507ae
> https://syzkaller.appspot.com/bug?extid=a7ab8df042baaf42ae3c
> https://syzkaller.appspot.com/bug?id=c814a55a728493959328551c769ede4c8ff72aab
> https://syzkaller.appspot.com/bug?id=426ad9adca053dafcd698f3a48ad5406dccc972b

The last one looks different and likely unrelated (I don't see scoping API
to be used anywhere in that subsystem) but the others look indeed valid. So
I agree it seems to be some very hard to hit problem and likely not just a
random corruption.

   

Re: possible deadlock in start_this_handle (2)

2021-02-11 Thread Dmitry Vyukov
On Thu, Feb 11, 2021 at 12:22 PM Dmitry Vyukov  wrote:
>
> On Thu, Feb 11, 2021 at 11:49 AM Jan Kara  wrote:
> >
> > Hello,
> >
> > added mm guys to CC.
> >
> > On Wed 10-02-21 05:35:18, syzbot wrote:
> > > HEAD commit:1e0d27fc Merge branch 'akpm' (patches from Andrew)
> > > git tree:   upstream
> > > console output: https://syzkaller.appspot.com/x/log.txt?x=15cbce90d0
> > > kernel config:  https://syzkaller.appspot.com/x/.config?x=bd1f72220b2e57eb
> > > dashboard link: 
> > > https://syzkaller.appspot.com/bug?extid=bfdded10ab7dcd7507ae
> > > userspace arch: i386
> > >
> > > Unfortunately, I don't have any reproducer for this issue yet.
> > >
> > > IMPORTANT: if you fix the issue, please add the following tag to the 
> > > commit:
> > > Reported-by: syzbot+bfdded10ab7dcd750...@syzkaller.appspotmail.com
> > >
> > > ==
> > > WARNING: possible circular locking dependency detected
> > > 5.11.0-rc6-syzkaller #0 Not tainted
> > > --
> > > kswapd0/2246 is trying to acquire lock:
> > > 888041a988e0 (jbd2_handle){}-{0:0}, at: 
> > > start_this_handle+0xf81/0x1380 fs/jbd2/transaction.c:444
> > >
> > > but task is already holding lock:
> > > 8be892c0 (fs_reclaim){+.+.}-{0:0}, at: 
> > > __fs_reclaim_acquire+0x0/0x30 mm/page_alloc.c:5195
> > >
> > > which lock already depends on the new lock.
> > >
> > > the existing dependency chain (in reverse order) is:
> > >
> > > -> #2 (fs_reclaim){+.+.}-{0:0}:
> > >__fs_reclaim_acquire mm/page_alloc.c:4326 [inline]
> > >fs_reclaim_acquire+0x117/0x150 mm/page_alloc.c:4340
> > >might_alloc include/linux/sched/mm.h:193 [inline]
> > >slab_pre_alloc_hook mm/slab.h:493 [inline]
> > >slab_alloc_node mm/slub.c:2817 [inline]
> > >__kmalloc_node+0x5f/0x430 mm/slub.c:4015
> > >kmalloc_node include/linux/slab.h:575 [inline]
> > >kvmalloc_node+0x61/0xf0 mm/util.c:587
> > >kvmalloc include/linux/mm.h:781 [inline]
> > >ext4_xattr_inode_cache_find fs/ext4/xattr.c:1465 [inline]
> > >ext4_xattr_inode_lookup_create fs/ext4/xattr.c:1508 [inline]
> > >ext4_xattr_set_entry+0x1ce6/0x3780 fs/ext4/xattr.c:1649
> > >ext4_xattr_ibody_set+0x78/0x2b0 fs/ext4/xattr.c:2224
> > >ext4_xattr_set_handle+0x8f4/0x13e0 fs/ext4/xattr.c:2380
> > >ext4_xattr_set+0x13a/0x340 fs/ext4/xattr.c:2493
> > >ext4_xattr_user_set+0xbc/0x100 fs/ext4/xattr_user.c:40
> > >__vfs_setxattr+0x10e/0x170 fs/xattr.c:177
> > >__vfs_setxattr_noperm+0x11a/0x4c0 fs/xattr.c:208
> > >__vfs_setxattr_locked+0x1bf/0x250 fs/xattr.c:266
> > >vfs_setxattr+0x135/0x320 fs/xattr.c:291
> > >setxattr+0x1ff/0x290 fs/xattr.c:553
> > >path_setxattr+0x170/0x190 fs/xattr.c:572
> > >__do_sys_setxattr fs/xattr.c:587 [inline]
> > >__se_sys_setxattr fs/xattr.c:583 [inline]
> > >__ia32_sys_setxattr+0xbc/0x150 fs/xattr.c:583
> > >do_syscall_32_irqs_on arch/x86/entry/common.c:77 [inline]
> > >__do_fast_syscall_32+0x56/0x80 arch/x86/entry/common.c:139
> > >do_fast_syscall_32+0x2f/0x70 arch/x86/entry/common.c:164
> > >entry_SYSENTER_compat_after_hwframe+0x4d/0x5c
> >
> > This stacktrace should never happen. ext4_xattr_set() starts a transaction.
> > That internally goes through start_this_handle() which calls:
> >
> > handle->saved_alloc_context = memalloc_nofs_save();
> >
> > and we restore the allocation context only in stop_this_handle() when
> > stopping the handle. And with this fs_reclaim_acquire() should remove
> > __GFP_FS from the mask and not call __fs_reclaim_acquire().
> >
> > Now I have no idea why something here didn't work out. Given we don't have
> > a reproducer it will be probably difficult to debug this. I'd note that
> > about year and half ago similar report happened (got autoclosed) so it may
> > be something real somewhere but it may also be just some HW glitch or
> > something like that.
>
> HW glitch is theoretically possible. But if we are considering such
> causes, I would say a kernel memory corruption is way more likely, we
> have hundreds of known memory-corruption-capable bugs open. In most
> cases they are caught by KASAN before doing silent damage. But KASAN
> can miss some cases.
>
> I see at least 4 existing bugs with similar stack:
> https://syzkaller.appspot.com/bug?extid=bfdded10ab7dcd7507ae
> https://syzkaller.appspot.com/bug?extid=a7ab8df042baaf42ae3c
> https://syzkaller.appspot.com/bug?id=c814a55a728493959328551c769ede4c8ff72aab
> https://syzkaller.appspot.com/bug?id=426ad9adca053dafcd698f3a48ad5406dccc972b
>
> All in all, I would not assume it's a memory corruption. When we had
> bugs that actually caused silent memory corruption, that caused a
> spike of random one-time crashes all over the kernel. This does not
> look like it.

I wonder if memalloc_nofs_save 

Re: possible deadlock in start_this_handle (2)

2021-02-11 Thread Dmitry Vyukov
On Thu, Feb 11, 2021 at 11:49 AM Jan Kara  wrote:
>
> Hello,
>
> added mm guys to CC.
>
> On Wed 10-02-21 05:35:18, syzbot wrote:
> > HEAD commit:1e0d27fc Merge branch 'akpm' (patches from Andrew)
> > git tree:   upstream
> > console output: https://syzkaller.appspot.com/x/log.txt?x=15cbce90d0
> > kernel config:  https://syzkaller.appspot.com/x/.config?x=bd1f72220b2e57eb
> > dashboard link: https://syzkaller.appspot.com/bug?extid=bfdded10ab7dcd7507ae
> > userspace arch: i386
> >
> > Unfortunately, I don't have any reproducer for this issue yet.
> >
> > IMPORTANT: if you fix the issue, please add the following tag to the commit:
> > Reported-by: syzbot+bfdded10ab7dcd750...@syzkaller.appspotmail.com
> >
> > ==
> > WARNING: possible circular locking dependency detected
> > 5.11.0-rc6-syzkaller #0 Not tainted
> > --
> > kswapd0/2246 is trying to acquire lock:
> > 888041a988e0 (jbd2_handle){}-{0:0}, at: 
> > start_this_handle+0xf81/0x1380 fs/jbd2/transaction.c:444
> >
> > but task is already holding lock:
> > 8be892c0 (fs_reclaim){+.+.}-{0:0}, at: 
> > __fs_reclaim_acquire+0x0/0x30 mm/page_alloc.c:5195
> >
> > which lock already depends on the new lock.
> >
> > the existing dependency chain (in reverse order) is:
> >
> > -> #2 (fs_reclaim){+.+.}-{0:0}:
> >__fs_reclaim_acquire mm/page_alloc.c:4326 [inline]
> >fs_reclaim_acquire+0x117/0x150 mm/page_alloc.c:4340
> >might_alloc include/linux/sched/mm.h:193 [inline]
> >slab_pre_alloc_hook mm/slab.h:493 [inline]
> >slab_alloc_node mm/slub.c:2817 [inline]
> >__kmalloc_node+0x5f/0x430 mm/slub.c:4015
> >kmalloc_node include/linux/slab.h:575 [inline]
> >kvmalloc_node+0x61/0xf0 mm/util.c:587
> >kvmalloc include/linux/mm.h:781 [inline]
> >ext4_xattr_inode_cache_find fs/ext4/xattr.c:1465 [inline]
> >ext4_xattr_inode_lookup_create fs/ext4/xattr.c:1508 [inline]
> >ext4_xattr_set_entry+0x1ce6/0x3780 fs/ext4/xattr.c:1649
> >ext4_xattr_ibody_set+0x78/0x2b0 fs/ext4/xattr.c:2224
> >ext4_xattr_set_handle+0x8f4/0x13e0 fs/ext4/xattr.c:2380
> >ext4_xattr_set+0x13a/0x340 fs/ext4/xattr.c:2493
> >ext4_xattr_user_set+0xbc/0x100 fs/ext4/xattr_user.c:40
> >__vfs_setxattr+0x10e/0x170 fs/xattr.c:177
> >__vfs_setxattr_noperm+0x11a/0x4c0 fs/xattr.c:208
> >__vfs_setxattr_locked+0x1bf/0x250 fs/xattr.c:266
> >vfs_setxattr+0x135/0x320 fs/xattr.c:291
> >setxattr+0x1ff/0x290 fs/xattr.c:553
> >path_setxattr+0x170/0x190 fs/xattr.c:572
> >__do_sys_setxattr fs/xattr.c:587 [inline]
> >__se_sys_setxattr fs/xattr.c:583 [inline]
> >__ia32_sys_setxattr+0xbc/0x150 fs/xattr.c:583
> >do_syscall_32_irqs_on arch/x86/entry/common.c:77 [inline]
> >__do_fast_syscall_32+0x56/0x80 arch/x86/entry/common.c:139
> >do_fast_syscall_32+0x2f/0x70 arch/x86/entry/common.c:164
> >entry_SYSENTER_compat_after_hwframe+0x4d/0x5c
>
> This stacktrace should never happen. ext4_xattr_set() starts a transaction.
> That internally goes through start_this_handle() which calls:
>
> handle->saved_alloc_context = memalloc_nofs_save();
>
> and we restore the allocation context only in stop_this_handle() when
> stopping the handle. And with this fs_reclaim_acquire() should remove
> __GFP_FS from the mask and not call __fs_reclaim_acquire().
>
> Now I have no idea why something here didn't work out. Given we don't have
> a reproducer it will be probably difficult to debug this. I'd note that
> about year and half ago similar report happened (got autoclosed) so it may
> be something real somewhere but it may also be just some HW glitch or
> something like that.

HW glitch is theoretically possible. But if we are considering such
causes, I would say a kernel memory corruption is way more likely, we
have hundreds of known memory-corruption-capable bugs open. In most
cases they are caught by KASAN before doing silent damage. But KASAN
can miss some cases.

I see at least 4 existing bugs with similar stack:
https://syzkaller.appspot.com/bug?extid=bfdded10ab7dcd7507ae
https://syzkaller.appspot.com/bug?extid=a7ab8df042baaf42ae3c
https://syzkaller.appspot.com/bug?id=c814a55a728493959328551c769ede4c8ff72aab
https://syzkaller.appspot.com/bug?id=426ad9adca053dafcd698f3a48ad5406dccc972b

All in all, I would not assume it's a memory corruption. When we had
bugs that actually caused silent memory corruption, that caused a
spike of random one-time crashes all over the kernel. This does not
look like it.


Re: possible deadlock in start_this_handle (2)

2021-02-11 Thread Michal Hocko
On Thu 11-02-21 11:49:47, Jan Kara wrote:
> Hello,
> 
> added mm guys to CC.
> 
> On Wed 10-02-21 05:35:18, syzbot wrote:
> > HEAD commit:1e0d27fc Merge branch 'akpm' (patches from Andrew)
> > git tree:   upstream
> > console output: https://syzkaller.appspot.com/x/log.txt?x=15cbce90d0
> > kernel config:  https://syzkaller.appspot.com/x/.config?x=bd1f72220b2e57eb
> > dashboard link: https://syzkaller.appspot.com/bug?extid=bfdded10ab7dcd7507ae
> > userspace arch: i386
> > 
> > Unfortunately, I don't have any reproducer for this issue yet.
> > 
> > IMPORTANT: if you fix the issue, please add the following tag to the commit:
> > Reported-by: syzbot+bfdded10ab7dcd750...@syzkaller.appspotmail.com
> > 
> > ==
> > WARNING: possible circular locking dependency detected
> > 5.11.0-rc6-syzkaller #0 Not tainted
> > --
> > kswapd0/2246 is trying to acquire lock:
> > 888041a988e0 (jbd2_handle){}-{0:0}, at: 
> > start_this_handle+0xf81/0x1380 fs/jbd2/transaction.c:444
> > 
> > but task is already holding lock:
> > 8be892c0 (fs_reclaim){+.+.}-{0:0}, at: 
> > __fs_reclaim_acquire+0x0/0x30 mm/page_alloc.c:5195
> > 
> > which lock already depends on the new lock.
> > 
> > the existing dependency chain (in reverse order) is:
> > 
> > -> #2 (fs_reclaim){+.+.}-{0:0}:
> >__fs_reclaim_acquire mm/page_alloc.c:4326 [inline]
> >fs_reclaim_acquire+0x117/0x150 mm/page_alloc.c:4340
> >might_alloc include/linux/sched/mm.h:193 [inline]
> >slab_pre_alloc_hook mm/slab.h:493 [inline]
> >slab_alloc_node mm/slub.c:2817 [inline]
> >__kmalloc_node+0x5f/0x430 mm/slub.c:4015
> >kmalloc_node include/linux/slab.h:575 [inline]
> >kvmalloc_node+0x61/0xf0 mm/util.c:587
> >kvmalloc include/linux/mm.h:781 [inline]
> >ext4_xattr_inode_cache_find fs/ext4/xattr.c:1465 [inline]
> >ext4_xattr_inode_lookup_create fs/ext4/xattr.c:1508 [inline]
> >ext4_xattr_set_entry+0x1ce6/0x3780 fs/ext4/xattr.c:1649
> >ext4_xattr_ibody_set+0x78/0x2b0 fs/ext4/xattr.c:2224
> >ext4_xattr_set_handle+0x8f4/0x13e0 fs/ext4/xattr.c:2380
> >ext4_xattr_set+0x13a/0x340 fs/ext4/xattr.c:2493
> >ext4_xattr_user_set+0xbc/0x100 fs/ext4/xattr_user.c:40
> >__vfs_setxattr+0x10e/0x170 fs/xattr.c:177
> >__vfs_setxattr_noperm+0x11a/0x4c0 fs/xattr.c:208
> >__vfs_setxattr_locked+0x1bf/0x250 fs/xattr.c:266
> >vfs_setxattr+0x135/0x320 fs/xattr.c:291
> >setxattr+0x1ff/0x290 fs/xattr.c:553
> >path_setxattr+0x170/0x190 fs/xattr.c:572
> >__do_sys_setxattr fs/xattr.c:587 [inline]
> >__se_sys_setxattr fs/xattr.c:583 [inline]
> >__ia32_sys_setxattr+0xbc/0x150 fs/xattr.c:583
> >do_syscall_32_irqs_on arch/x86/entry/common.c:77 [inline]
> >__do_fast_syscall_32+0x56/0x80 arch/x86/entry/common.c:139
> >do_fast_syscall_32+0x2f/0x70 arch/x86/entry/common.c:164
> >entry_SYSENTER_compat_after_hwframe+0x4d/0x5c
> 
> This stacktrace should never happen. ext4_xattr_set() starts a transaction.
> That internally goes through start_this_handle() which calls:
> 
>   handle->saved_alloc_context = memalloc_nofs_save();
> 
> and we restore the allocation context only in stop_this_handle() when
> stopping the handle. And with this fs_reclaim_acquire() should remove
> __GFP_FS from the mask and not call __fs_reclaim_acquire().
> 
> Now I have no idea why something here didn't work out. Given we don't have
> a reproducer it will be probably difficult to debug this. I'd note that
> about year and half ago similar report happened (got autoclosed) so it may
> be something real somewhere but it may also be just some HW glitch or
> something like that.

Is it possible this is just a lockdep false positive? Is it possible
that there is a pre-recorded lock dependency chain that happens outside
of the transaction and that clashes with this one?

I do not remember any recent changes in the way how scope API is handled
except for CMA scope API changes but those should be pretty much
independent.
-- 
Michal Hocko
SUSE Labs


Re: possible deadlock in start_this_handle (2)

2021-02-11 Thread Jan Kara
Hello,

added mm guys to CC.

On Wed 10-02-21 05:35:18, syzbot wrote:
> HEAD commit:1e0d27fc Merge branch 'akpm' (patches from Andrew)
> git tree:   upstream
> console output: https://syzkaller.appspot.com/x/log.txt?x=15cbce90d0
> kernel config:  https://syzkaller.appspot.com/x/.config?x=bd1f72220b2e57eb
> dashboard link: https://syzkaller.appspot.com/bug?extid=bfdded10ab7dcd7507ae
> userspace arch: i386
> 
> Unfortunately, I don't have any reproducer for this issue yet.
> 
> IMPORTANT: if you fix the issue, please add the following tag to the commit:
> Reported-by: syzbot+bfdded10ab7dcd750...@syzkaller.appspotmail.com
> 
> ==
> WARNING: possible circular locking dependency detected
> 5.11.0-rc6-syzkaller #0 Not tainted
> --
> kswapd0/2246 is trying to acquire lock:
> 888041a988e0 (jbd2_handle){}-{0:0}, at: 
> start_this_handle+0xf81/0x1380 fs/jbd2/transaction.c:444
> 
> but task is already holding lock:
> 8be892c0 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x0/0x30 
> mm/page_alloc.c:5195
> 
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #2 (fs_reclaim){+.+.}-{0:0}:
>__fs_reclaim_acquire mm/page_alloc.c:4326 [inline]
>fs_reclaim_acquire+0x117/0x150 mm/page_alloc.c:4340
>might_alloc include/linux/sched/mm.h:193 [inline]
>slab_pre_alloc_hook mm/slab.h:493 [inline]
>slab_alloc_node mm/slub.c:2817 [inline]
>__kmalloc_node+0x5f/0x430 mm/slub.c:4015
>kmalloc_node include/linux/slab.h:575 [inline]
>kvmalloc_node+0x61/0xf0 mm/util.c:587
>kvmalloc include/linux/mm.h:781 [inline]
>ext4_xattr_inode_cache_find fs/ext4/xattr.c:1465 [inline]
>ext4_xattr_inode_lookup_create fs/ext4/xattr.c:1508 [inline]
>ext4_xattr_set_entry+0x1ce6/0x3780 fs/ext4/xattr.c:1649
>ext4_xattr_ibody_set+0x78/0x2b0 fs/ext4/xattr.c:2224
>ext4_xattr_set_handle+0x8f4/0x13e0 fs/ext4/xattr.c:2380
>ext4_xattr_set+0x13a/0x340 fs/ext4/xattr.c:2493
>ext4_xattr_user_set+0xbc/0x100 fs/ext4/xattr_user.c:40
>__vfs_setxattr+0x10e/0x170 fs/xattr.c:177
>__vfs_setxattr_noperm+0x11a/0x4c0 fs/xattr.c:208
>__vfs_setxattr_locked+0x1bf/0x250 fs/xattr.c:266
>vfs_setxattr+0x135/0x320 fs/xattr.c:291
>setxattr+0x1ff/0x290 fs/xattr.c:553
>path_setxattr+0x170/0x190 fs/xattr.c:572
>__do_sys_setxattr fs/xattr.c:587 [inline]
>__se_sys_setxattr fs/xattr.c:583 [inline]
>__ia32_sys_setxattr+0xbc/0x150 fs/xattr.c:583
>do_syscall_32_irqs_on arch/x86/entry/common.c:77 [inline]
>__do_fast_syscall_32+0x56/0x80 arch/x86/entry/common.c:139
>do_fast_syscall_32+0x2f/0x70 arch/x86/entry/common.c:164
>entry_SYSENTER_compat_after_hwframe+0x4d/0x5c

This stacktrace should never happen. ext4_xattr_set() starts a transaction.
That internally goes through start_this_handle() which calls:

handle->saved_alloc_context = memalloc_nofs_save();

and we restore the allocation context only in stop_this_handle() when
stopping the handle. And with this fs_reclaim_acquire() should remove
__GFP_FS from the mask and not call __fs_reclaim_acquire().

Now I have no idea why something here didn't work out. Given we don't have
a reproducer it will be probably difficult to debug this. I'd note that
about year and half ago similar report happened (got autoclosed) so it may
be something real somewhere but it may also be just some HW glitch or
something like that.

Honza
-- 
Jan Kara 
SUSE Labs, CR


possible deadlock in start_this_handle (2)

2021-02-10 Thread syzbot
Hello,

syzbot found the following issue on:

HEAD commit:1e0d27fc Merge branch 'akpm' (patches from Andrew)
git tree:   upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=15cbce90d0
kernel config:  https://syzkaller.appspot.com/x/.config?x=bd1f72220b2e57eb
dashboard link: https://syzkaller.appspot.com/bug?extid=bfdded10ab7dcd7507ae
userspace arch: i386

Unfortunately, I don't have any reproducer for this issue yet.

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+bfdded10ab7dcd750...@syzkaller.appspotmail.com

==
WARNING: possible circular locking dependency detected
5.11.0-rc6-syzkaller #0 Not tainted
--
kswapd0/2246 is trying to acquire lock:
888041a988e0 (jbd2_handle){}-{0:0}, at: start_this_handle+0xf81/0x1380 
fs/jbd2/transaction.c:444

but task is already holding lock:
8be892c0 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x0/0x30 
mm/page_alloc.c:5195

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #2 (fs_reclaim){+.+.}-{0:0}:
   __fs_reclaim_acquire mm/page_alloc.c:4326 [inline]
   fs_reclaim_acquire+0x117/0x150 mm/page_alloc.c:4340
   might_alloc include/linux/sched/mm.h:193 [inline]
   slab_pre_alloc_hook mm/slab.h:493 [inline]
   slab_alloc_node mm/slub.c:2817 [inline]
   __kmalloc_node+0x5f/0x430 mm/slub.c:4015
   kmalloc_node include/linux/slab.h:575 [inline]
   kvmalloc_node+0x61/0xf0 mm/util.c:587
   kvmalloc include/linux/mm.h:781 [inline]
   ext4_xattr_inode_cache_find fs/ext4/xattr.c:1465 [inline]
   ext4_xattr_inode_lookup_create fs/ext4/xattr.c:1508 [inline]
   ext4_xattr_set_entry+0x1ce6/0x3780 fs/ext4/xattr.c:1649
   ext4_xattr_ibody_set+0x78/0x2b0 fs/ext4/xattr.c:2224
   ext4_xattr_set_handle+0x8f4/0x13e0 fs/ext4/xattr.c:2380
   ext4_xattr_set+0x13a/0x340 fs/ext4/xattr.c:2493
   ext4_xattr_user_set+0xbc/0x100 fs/ext4/xattr_user.c:40
   __vfs_setxattr+0x10e/0x170 fs/xattr.c:177
   __vfs_setxattr_noperm+0x11a/0x4c0 fs/xattr.c:208
   __vfs_setxattr_locked+0x1bf/0x250 fs/xattr.c:266
   vfs_setxattr+0x135/0x320 fs/xattr.c:291
   setxattr+0x1ff/0x290 fs/xattr.c:553
   path_setxattr+0x170/0x190 fs/xattr.c:572
   __do_sys_setxattr fs/xattr.c:587 [inline]
   __se_sys_setxattr fs/xattr.c:583 [inline]
   __ia32_sys_setxattr+0xbc/0x150 fs/xattr.c:583
   do_syscall_32_irqs_on arch/x86/entry/common.c:77 [inline]
   __do_fast_syscall_32+0x56/0x80 arch/x86/entry/common.c:139
   do_fast_syscall_32+0x2f/0x70 arch/x86/entry/common.c:164
   entry_SYSENTER_compat_after_hwframe+0x4d/0x5c

-> #1 (>xattr_sem){}-{3:3}:
   down_read+0x95/0x440 kernel/locking/rwsem.c:1353
   ext4_setattr+0x570/0x1fd0 fs/ext4/inode.c:5375
   notify_change+0xb60/0x10a0 fs/attr.c:336
   chown_common+0x4a9/0x550 fs/open.c:674
   vfs_fchown fs/open.c:741 [inline]
   vfs_fchown fs/open.c:733 [inline]
   ksys_fchown+0x111/0x170 fs/open.c:752
   __do_sys_fchown fs/open.c:760 [inline]
   __se_sys_fchown fs/open.c:758 [inline]
   __x64_sys_fchown+0x6f/0xb0 fs/open.c:758
   do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
   entry_SYSCALL_64_after_hwframe+0x44/0xa9

-> #0 (jbd2_handle){}-{0:0}:
   check_prev_add kernel/locking/lockdep.c:2868 [inline]
   check_prevs_add kernel/locking/lockdep.c:2993 [inline]
   validate_chain kernel/locking/lockdep.c:3608 [inline]
   __lock_acquire+0x2b26/0x54f0 kernel/locking/lockdep.c:4832
   lock_acquire kernel/locking/lockdep.c:5442 [inline]
   lock_acquire+0x1a8/0x720 kernel/locking/lockdep.c:5407
   start_this_handle+0xfb4/0x1380 fs/jbd2/transaction.c:446
   jbd2__journal_start+0x399/0x930 fs/jbd2/transaction.c:503
   __ext4_journal_start_sb+0x227/0x4a0 fs/ext4/ext4_jbd2.c:105
   __ext4_journal_start fs/ext4/ext4_jbd2.h:320 [inline]
   ext4_dirty_inode+0xbc/0x130 fs/ext4/inode.c:5951
   __mark_inode_dirty+0x81f/0x1070 fs/fs-writeback.c:2262
   mark_inode_dirty_sync include/linux/fs.h:2186 [inline]
   iput.part.0+0x57/0x810 fs/inode.c:1676
   iput+0x58/0x70 fs/inode.c:1669
   dentry_unlink_inode+0x2b1/0x3d0 fs/dcache.c:374
   __dentry_kill+0x3c0/0x640 fs/dcache.c:579
   shrink_dentry_list+0x144/0x480 fs/dcache.c:1148
   prune_dcache_sb+0xe7/0x140 fs/dcache.c:1229
   super_cache_scan+0x336/0x590 fs/super.c:105
   do_shrink_slab+0x3e4/0x9f0 mm/vmscan.c:511
   shrink_slab+0x16f/0x5d0 mm/vmscan.c:672
   shrink_node_memcgs mm/vmscan.c:2665 [inline]
   shrink_node+0x8cc/0x1de0 mm/vmscan.c:2780
   kswapd_shrink_node mm/vmscan.c:3523 [inline]
   balance_pgdat+0x745/0x1270 mm/vmscan.c:3681
   kswapd+0x5b1/0xdb0 mm/vmscan.c:3938
   kthread+0x3b1/0x4a0 kernel/kthread.c:292