I just upgraded to the latest ZoL release and am still having the same
problem. I also upgrade my pool (after creating a checkpoint).

$ modinfo zfs | head -12
filename:       /lib/modules/5.4.0-66-generic/updates/dkms/zfs.ko
version:        2.0.3-0york0~20.04
license:        CDDL
author:         OpenZFS
description:    ZFS
alias:          devname:zfs
alias:          char-major-10-249
srcversion:     DCE77834FDF1B30075B1328
depends:        spl,znvpair,icp,zlua,zzstd,zunicode,zcommon,zavl
retpoline:      Y
name:           zfs
vermagic:       5.4.0-66-generic SMP mod_unload

$ dpkg -l | grep zfs
ii  libzfs2linux                         2.0.3-0york0~20.04                
amd64        OpenZFS filesystem library for Linux
ii  zfs-dkms                             2.0.3-0york0~20.04                all  
        OpenZFS filesystem kernel modules for Linux
ii  zfs-zed                              0.8.3-1ubuntu12.6                 
amd64        OpenZFS Event Daemon
ii  zfsutils-linux                       2.0.3-0york0~20.04                
amd64        command-line tools to manage OpenZFS filesystems

$ zfs --version
zfs-2.0.3-0york0~20.04
zfs-kmod-0.8.3-1ubuntu12.6


Panic:
```
Mar  7 20:29:28 home-nas kernel: [  181.778239] VERIFY(0 == 
sa_handle_get_from_db(zfsvfs->z_os, db, zp, SA_HDL_SHARED, &zp->z_sa_hdl)) 
failed
Mar  7 20:29:28 home-nas kernel: [  181.778605] PANIC at 
zfs_znode.c:339:zfs_znode_sa_init()
Mar  7 20:29:28 home-nas kernel: [  181.778778] Showing stack for process 2854
Mar  7 20:29:28 home-nas kernel: [  181.778793] CPU: 0 PID: 2854 Comm: ls 
Tainted: P           OE     5.4.0-66-generic #74-Ubuntu
Mar  7 20:29:28 home-nas kernel: [  181.778796] Hardware name: QEMU Standard PC 
(i440FX + PIIX, 1996), BIOS rel-1.13.0-48-gd9c812dda519-prebuilt.qemu.org 
04/01/2014
Mar  7 20:29:28 home-nas kernel: [  181.778798] Call Trace:
Mar  7 20:29:28 home-nas kernel: [  181.778948]  dump_stack+0x6d/0x9a
Mar  7 20:29:28 home-nas kernel: [  181.779068]  spl_dumpstack+0x29/0x2b [spl]
Mar  7 20:29:28 home-nas kernel: [  181.779081]  spl_panic+0xd4/0xfc [spl]
Mar  7 20:29:28 home-nas kernel: [  181.779491]  ? __zfs_dbgmsg+0xe0/0x110 [zfs]
Mar  7 20:29:28 home-nas kernel: [  181.779624]  ? 
sa_cache_constructor+0x27/0x50 [zfs]
Mar  7 20:29:28 home-nas kernel: [  181.779643]  ? _cond_resched+0x19/0x30
Mar  7 20:29:28 home-nas kernel: [  181.779657]  ? mutex_lock+0x13/0x40
Mar  7 20:29:28 home-nas kernel: [  181.779760]  ? 
dmu_buf_replace_user+0x60/0x80 [zfs]
Mar  7 20:29:28 home-nas kernel: [  181.779863]  ? 
dmu_buf_set_user_ie+0x1a/0x20 [zfs]
Mar  7 20:29:28 home-nas kernel: [  181.780000]  
zfs_znode_sa_init.isra.0+0xdf/0xf0 [zfs]
Mar  7 20:29:28 home-nas kernel: [  181.780138]  zfs_znode_alloc+0x102/0x6d0 
[zfs]
Mar  7 20:29:28 home-nas kernel: [  181.780249]  ? aggsum_add+0x196/0x1b0 [zfs]
Mar  7 20:29:28 home-nas kernel: [  181.780343]  ? 
dmu_buf_unlock_parent+0x38/0x80 [zfs]
Mar  7 20:29:28 home-nas kernel: [  181.780429]  ? 
dbuf_read_impl.constprop.0+0x614/0x700 [zfs]
Mar  7 20:29:28 home-nas kernel: [  181.780442]  ? 
spl_kmem_cache_alloc+0xc1/0x7d0 [spl]
Mar  7 20:29:28 home-nas kernel: [  181.780447]  ? _cond_resched+0x19/0x30
Mar  7 20:29:28 home-nas kernel: [  181.780452]  ? mutex_lock+0x13/0x40
Mar  7 20:29:28 home-nas kernel: [  181.780537]  ? aggsum_add+0x196/0x1b0 [zfs]
Mar  7 20:29:28 home-nas kernel: [  181.780541]  ? _cond_resched+0x19/0x30
Mar  7 20:29:28 home-nas kernel: [  181.780546]  ? _cond_resched+0x19/0x30
Mar  7 20:29:28 home-nas kernel: [  181.780551]  ? down_read+0x13/0xa0
Mar  7 20:29:28 home-nas kernel: [  181.780638]  ? dbuf_read+0x1d0/0x520 [zfs]
Mar  7 20:29:28 home-nas kernel: [  181.780643]  ? _cond_resched+0x19/0x30
Mar  7 20:29:28 home-nas kernel: [  181.780647]  ? mutex_lock+0x13/0x40
Mar  7 20:29:28 home-nas kernel: [  181.780746]  ? 
dnode_rele_and_unlock+0x6c/0xe0 [zfs]
Mar  7 20:29:28 home-nas kernel: [  181.780751]  ? _cond_resched+0x19/0x30
Mar  7 20:29:28 home-nas kernel: [  181.780755]  ? mutex_lock+0x13/0x40
Mar  7 20:29:28 home-nas kernel: [  181.780847]  ? 
dmu_object_info_from_dnode+0x84/0xb0 [zfs]
Mar  7 20:29:28 home-nas kernel: [  181.780975]  zfs_zget+0x1c3/0x270 [zfs]
Mar  7 20:29:28 home-nas kernel: [  181.781104]  zfs_dirent_lock+0x34b/0x680 
[zfs]
Mar  7 20:29:28 home-nas kernel: [  181.781232]  zfs_dirlook+0x90/0x2b0 [zfs]
Mar  7 20:29:28 home-nas kernel: [  181.781358]  ? zfs_zaccess+0x153/0x410 [zfs]
Mar  7 20:29:28 home-nas kernel: [  181.781484]  zfs_lookup+0x1fd/0x3f0 [zfs]
Mar  7 20:29:28 home-nas kernel: [  181.781608]  zpl_lookup+0xc9/0x1e0 [zfs]
Mar  7 20:29:28 home-nas kernel: [  181.781662]  ? security_capable+0x3d/0x60
Mar  7 20:29:28 home-nas kernel: [  181.781697]  __lookup_slow+0x92/0x160
Mar  7 20:29:28 home-nas kernel: [  181.781703]  lookup_slow+0x3b/0x60
Mar  7 20:29:28 home-nas kernel: [  181.781709]  walk_component+0x1da/0x360
Mar  7 20:29:28 home-nas kernel: [  181.781714]  ? 
link_path_walk.part.0+0x6d/0x550
Mar  7 20:29:28 home-nas kernel: [  181.781720]  path_lookupat.isra.0+0x80/0x230
Mar  7 20:29:28 home-nas kernel: [  181.781758]  ? kfree+0x231/0x250
Mar  7 20:29:28 home-nas kernel: [  181.781765]  filename_lookup+0xae/0x170
Mar  7 20:29:28 home-nas kernel: [  181.781780]  ? 
__check_object_size+0x13f/0x150
Mar  7 20:29:28 home-nas kernel: [  181.781808]  ? strncpy_from_user+0x4c/0x150
Mar  7 20:29:28 home-nas kernel: [  181.781815]  user_path_at_empty+0x3a/0x50
Mar  7 20:29:28 home-nas kernel: [  181.781819]  vfs_statx+0x7d/0xe0
Mar  7 20:29:28 home-nas kernel: [  181.781823]  ? strncpy_from_user+0x4c/0x150
Mar  7 20:29:28 home-nas kernel: [  181.781827]  __do_sys_newlstat+0x3e/0x80
Mar  7 20:29:28 home-nas kernel: [  181.781841]  ? mntput+0x24/0x40
Mar  7 20:29:28 home-nas kernel: [  181.781846]  ? path_put+0x1e/0x30
Mar  7 20:29:28 home-nas kernel: [  181.781849]  ? path_getxattr+0x70/0xb0
Mar  7 20:29:28 home-nas kernel: [  181.781854]  __x64_sys_newlstat+0x16/0x20
Mar  7 20:29:28 home-nas kernel: [  181.781895]  do_syscall_64+0x57/0x190
Mar  7 20:29:28 home-nas kernel: [  181.781903]  
entry_SYSCALL_64_after_hwframe+0x44/0xa9
Mar  7 20:29:28 home-nas kernel: [  181.781929] RIP: 0033:0x7ff4b670b6ea
Mar  7 20:29:28 home-nas kernel: [  181.781950] Code: ff ff ff c3 66 2e 0f 1f 
84 00 00 00 00 00 66 90 f3 0f 1e fa 41 89 f8 48 89 f7 48 89 d6 41 83 f8 01 77 
2d b8 06 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 06 c3 0f 1f 44 00 00 48 8b 15 71 
a7 0d 00 f7
Mar  7 20:29:28 home-nas kernel: [  181.781953] RSP: 002b:00007ffc67e30078 
EFLAGS: 00000246 ORIG_RAX: 0000000000000006
Mar  7 20:29:28 home-nas kernel: [  181.781965] RAX: ffffffffffffffda RBX: 
00007ff4b6362b20 RCX: 00007ff4b670b6ea
Mar  7 20:29:28 home-nas kernel: [  181.781968] RDX: 00007ff4b6362b38 RSI: 
00007ff4b6362b38 RDI: 00007ffc67e30080
Mar  7 20:29:28 home-nas kernel: [  181.781970] RBP: 00007ffc67e30420 R08: 
0000000000000001 R09: 00000000f1c38400
Mar  7 20:29:28 home-nas kernel: [  181.781973] R10: 00007ffc67e30080 R11: 
0000000000000246 R12: 00005639f1c384d3
Mar  7 20:29:28 home-nas kernel: [  181.781975] R13: 0000000000000005 R14: 
00007ffc67e30080 R15: 00007ff4b6362b38
```

-- 
You received this bug notification because you are a member of Kernel
Packages, which is subscribed to zfs-linux in Ubuntu.
https://bugs.launchpad.net/bugs/1906476

Title:
  PANIC at zfs_znode.c:335:zfs_znode_sa_init() // VERIFY(0 ==
  sa_handle_get_from_db(zfsvfs->z_os, db, zp, SA_HDL_SHARED,
  &zp->z_sa_hdl)) failed

Status in Native ZFS for Linux:
  New
Status in zfs-linux package in Ubuntu:
  Fix Released

Bug description:
  Since today while running Ubuntu 21.04 Hirsute I started getting a ZFS
  panic in the kernel log which was also hanging Disk I/O for all
  Chrome/Electron Apps.

  I have narrowed down a few important notes:
  - It does not happen with module version 0.8.4-1ubuntu11 built and included 
with 5.8.0-29-generic

  - It was happening when using zfs-dkms 0.8.4-1ubuntu16 built with DKMS
  on the same kernel and also on 5.8.18-acso (a custom kernel).

  - For whatever reason multiple Chrome/Electron apps were affected,
  specifically Discord, Chrome and Mattermost. In all cases they seem
  (but I was unable to strace the processes so it was a bit hard ot
  confirm 100% but by deduction from /proc/PID/fd and the hanging ls)
  they seem hung trying to open files in their 'Cache' directory, e.g.
  ~/.cache/google-chrome/Default/Cache and ~/.config/Mattermost/Cache ..
  while the issue was going on I could not list that directory either
  "ls" would just hang.

  - Once I removed zfs-dkms only to revert to the kernel built-in
  version it immediately worked without changing anything, removing
  files, etc.

  - It happened over multiple reboots and kernels every time, all my
  Chrome apps weren't working but for whatever reason nothing else
  seemed affected.

  - It would log a series of spl_panic dumps into kern.log that look like this:
  Dec  2 12:36:42 optane kernel: [   72.857033] VERIFY(0 == 
sa_handle_get_from_db(zfsvfs->z_os, db, zp, SA_HDL_SHARED, &zp->z_sa_hdl)) 
failed
  Dec  2 12:36:42 optane kernel: [   72.857036] PANIC at 
zfs_znode.c:335:zfs_znode_sa_init()

  I could only find one other google reference to this issue, with 2 other 
users reporting the same error but on 20.04 here:
  https://github.com/openzfs/zfs/issues/10971

  - I was not experiencing the issue on 0.8.4-1ubuntu14 and fairly sure
  it was working on 0.8.4-1ubuntu15 but broken after upgrade to
  0.8.4-1ubuntu16. I will reinstall those zfs-dkms versions to verify
  that.

  There were a few originating call stacks but the first one I hit was

  Call Trace:
   dump_stack+0x74/0x95
   spl_dumpstack+0x29/0x2b [spl]
   spl_panic+0xd4/0xfc [spl]
   ? sa_cache_constructor+0x27/0x50 [zfs]
   ? _cond_resched+0x19/0x40
   ? mutex_lock+0x12/0x40
   ? dmu_buf_set_user_ie+0x54/0x80 [zfs]
   zfs_znode_sa_init+0xe0/0xf0 [zfs]
   zfs_znode_alloc+0x101/0x700 [zfs]
   ? arc_buf_fill+0x270/0xd30 [zfs]
   ? __cv_init+0x42/0x60 [spl]
   ? dnode_cons+0x28f/0x2a0 [zfs]
   ? _cond_resched+0x19/0x40
   ? _cond_resched+0x19/0x40
   ? mutex_lock+0x12/0x40
   ? aggsum_add+0x153/0x170 [zfs]
   ? spl_kmem_alloc_impl+0xd8/0x110 [spl]
   ? arc_space_consume+0x54/0xe0 [zfs]
   ? dbuf_read+0x4a0/0xb50 [zfs]
   ? _cond_resched+0x19/0x40
   ? mutex_lock+0x12/0x40
   ? dnode_rele_and_unlock+0x5a/0xc0 [zfs]
   ? _cond_resched+0x19/0x40
   ? mutex_lock+0x12/0x40
   ? dmu_object_info_from_dnode+0x84/0xb0 [zfs]
   zfs_zget+0x1c3/0x270 [zfs]
   ? dmu_buf_rele+0x3a/0x40 [zfs]
   zfs_dirent_lock+0x349/0x680 [zfs]
   zfs_dirlook+0x90/0x2a0 [zfs]
   ? zfs_zaccess+0x10c/0x480 [zfs]
   zfs_lookup+0x202/0x3b0 [zfs]
   zpl_lookup+0xca/0x1e0 [zfs]
   path_openat+0x6a2/0xfe0
   do_filp_open+0x9b/0x110
   ? __check_object_size+0xdb/0x1b0
   ? __alloc_fd+0x46/0x170
   do_sys_openat2+0x217/0x2d0
   ? do_sys_openat2+0x217/0x2d0
   do_sys_open+0x59/0x80
   __x64_sys_openat+0x20/0x30

To manage notifications about this bug go to:
https://bugs.launchpad.net/zfs/+bug/1906476/+subscriptions

-- 
Mailing list: https://launchpad.net/~kernel-packages
Post to     : kernel-packages@lists.launchpad.net
Unsubscribe : https://launchpad.net/~kernel-packages
More help   : https://help.launchpad.net/ListHelp

Reply via email to