Hi, I've been following these forums for a long time but this is my first post.
I'm looking for some advice on debugging an issue. I've been looking at all
the bug reports and updates though b146 but I can't find a good match. I tried
the fix for 6937998 but it didn't help.
Running Nexenta NCP3 and when I attempt to do a simple zfs send of my root pool
(syspool) > /dev/null, it sends all the volume streams but then all IO hangs at
the moment the send seems like it should be completed. I have to restart the box
at this point.
The following mdb output is from the hung system (from a savecore -L). I'm
still learning my way around mdb and kernel debugging so any suggestions on how
to track this down would be really appreciated. It seems like it's stuck
waiting for txg_wait_synced.
::ptree
ffffff02e8d97718 sshd
ffffff02e74c3570 sshd
ffffff02e8d95e48 tcsh
ffffff02d1cc3e20 bash
ffffff02e7f4a720 bash
ffffff02e6bec900 zfs
ffffff02e6bec900::walk thread
ffffff02d1954720
ffffff02d1954720::threadlist -v
ADDR PROC LWP CLS PRI WCHAN
ffffff02d1954720 ffffff02e6bec900 ffffff02cf543850 1 60 ffffff02cd54054a
PC: _resume_from_idle+0xf1 CMD: zfs send -Rvp sysp...@20100824
stack pointer for thread ffffff02d1954720: ffffff0010b6ca90
[ ffffff0010b6ca90 _resume_from_idle+0xf1() ]
swtch+0x145()
cv_wait+0x61()
txg_wait_synced+0x7c()
dsl_sync_task_group_wait+0xee()
dsl_dataset_user_release+0x101()
zfs_ioc_release+0x51()
zfsdev_ioctl+0x177()
cdev_ioctl+0x45()
spec_ioctl+0x5a()
fop_ioctl+0x7b()
ioctl+0x18e()
sys_syscall32+0xff()
ffffff02d1954720::findstack -v
stack pointer for thread ffffff02d1954720: ffffff0010b6ca90
[ ffffff0010b6ca90 _resume_from_idle+0xf1() ]
ffffff0010b6cac0 swtch+0x145()
ffffff0010b6caf0 cv_wait+0x61(ffffff02cd54054a, ffffff02cd540510)
ffffff0010b6cb40 txg_wait_synced+0x7c(ffffff02cd540380, 9291)
ffffff0010b6cb80 dsl_sync_task_group_wait+0xee(ffffff02d0b1a868)
ffffff0010b6cc10 dsl_dataset_user_release+0x101(ffffff02d1336000,
ffffff02d1336400, ffffff02d1336c00, 1)
ffffff0010b6cc40 zfs_ioc_release+0x51(ffffff02d1336000)
ffffff0010b6ccc0 zfsdev_ioctl+0x177(b600000000, 5a32, 8045660, 100003,
ffffff02cd646588, ffffff0010b6cde4)
ffffff0010b6cd00 cdev_ioctl+0x45(b600000000, 5a32, 8045660, 100003,
ffffff02cd646588, ffffff0010b6cde4)
ffffff0010b6cd40 spec_ioctl+0x5a(ffffff02d17c3180, 5a32, 8045660, 100003,
ffffff02cd646588, ffffff0010b6cde4, 0)
ffffff0010b6cdc0 fop_ioctl+0x7b(ffffff02d17c3180, 5a32, 8045660, 100003,
ffffff02cd646588, ffffff0010b6cde4, 0)
ffffff0010b6cec0 ioctl+0x18e(3, 5a32, 8045660)
ffffff0010b6cf10 sys_syscall32+0xff()
ffffff02cd540380::print dsl_pool_t dp_tx
dp_tx = {
dp_tx.tx_cpu = 0xffffff02cd540680
dp_tx.tx_sync_lock = {
_opaque = [ 0 ]
}
dp_tx.tx_open_txg = 0x9292
dp_tx.tx_quiesced_txg = 0
dp_tx.tx_syncing_txg = 0x9291
dp_tx.tx_synced_txg = 0x9290
dp_tx.tx_sync_txg_waiting = 0x9292
dp_tx.tx_quiesce_txg_waiting = 0x9292
dp_tx.tx_sync_more_cv = {
_opaque = 0
}
dp_tx.tx_sync_done_cv = {
_opaque = 0x2
}
dp_tx.tx_quiesce_more_cv = {
_opaque = 0x1
}
dp_tx.tx_quiesce_done_cv = {
_opaque = 0
}
dp_tx.tx_timeout_cv = {
_opaque = 0
}
dp_tx.tx_exit_cv = {
_opaque = 0
}
dp_tx.tx_threads = 0x2
dp_tx.tx_exiting = 0
dp_tx.tx_sync_thread = 0xffffff000fa05c60
dp_tx.tx_quiesce_thread = 0xffffff000f9fcc60
dp_tx.tx_commit_cb_taskq = 0
ffffff02cd540380::print dsl_pool_t dp_tx.tx_sync_thread
dp_tx.tx_sync_thread = 0xffffff000fa05c60
0xffffff000fa05c60::findstack -v
stack pointer for thread ffffff000fa05c60: ffffff000fa05860
[ ffffff000fa05860 _resume_from_idle+0xf1() ]
ffffff000fa05890 swtch+0x145()
ffffff000fa058c0 cv_wait+0x61(ffffff000fa05e3e, ffffff000fa05e40)
ffffff000fa05900 delay_common+0xab(1)
ffffff000fa05940 delay+0xc4(1)
ffffff000fa05960 dnode_special_close+0x28(ffffff02e8aa2050)
ffffff000fa05990 dmu_objset_evict+0x160(ffffff02e5b91100)
ffffff000fa05a20 dsl_dataset_user_release_sync+0x52(ffffff02e000b928,
ffffff02d0b1a868, ffffff02e5b9c6e0)
ffffff000fa05a70 dsl_sync_task_group_sync+0xf3(ffffff02d0b1a868,
ffffff02e5b9c6e0)
ffffff000fa05af0 dsl_pool_sync+0x1ec(ffffff02cd540380, 9291)
ffffff000fa05ba0 spa_sync+0x37b(ffffff02cdd40b00, 9291)
ffffff000fa05c40 txg_sync_thread+0x247(ffffff02cd540380)
ffffff000fa05c50 thread_start+8()
::spa
ADDR STATE NAME
ffffff02cdd40b00 ACTIVE syspool
ffffff02cdd40b00::print spa_t spa_dsl_pool->dp_tx.tx_sync_thread|::findstack -
v
stack pointer for thread ffffff000fa05c60: ffffff000fa05860
[ ffffff000fa05860 _resume_from_idle+0xf1() ]
ffffff000fa05890 swtch+0x145()
ffffff000fa058c0 cv_wait+0x61(ffffff000fa05e3e, ffffff000fa05e40)
ffffff000fa05900 delay_common+0xab(1)
ffffff000fa05940 delay+0xc4(1)
ffffff000fa05960 dnode_special_close+0x28(ffffff02e8aa2050)
ffffff000fa05990 dmu_objset_evict+0x160(ffffff02e5b91100)
ffffff000fa05a20 dsl_dataset_user_release_sync+0x52(ffffff02e000b928,
ffffff02d0b1a868, ffffff02e5b9c6e0)
ffffff000fa05a70 dsl_sync_task_group_sync+0xf3(ffffff02d0b1a868,
ffffff02e5b9c6e0)
ffffff000fa05af0 dsl_pool_sync+0x1ec(ffffff02cd540380, 9291)
ffffff000fa05ba0 spa_sync+0x37b(ffffff02cdd40b00, 9291)
ffffff000fa05c40 txg_sync_thread+0x247(ffffff02cd540380)
ffffff000fa05c50 thread_start+8()