Hi.

Kris Kennaway <kris at FreeBSD.org> found a deadlock, which I think is not
FreeBSD-specific.

When we are running low in memory and kmem_alloc(KM_SLEEP) is called,
the thread waits for the memory to be reclaimed, right?
In such situation the arc_reclaim_thread thread is woken up.

Ok. I've two threads waiting for the memory to be freed:

First one, and this one is not really problematic:

arc_lowmem(0,0,a054c56b,12c,a0a74088,...) at arc_lowmem+0x74
kmem_malloc(a0a71090,20000,2,e6768840,a047e7e5,...) at kmem_malloc+0x131
page_alloc(0,20000,e6768833,2,aedd9d20,...) at page_alloc+0x27
uma_large_malloc(20000,2,0,0,0,...) at uma_large_malloc+0x55
malloc(20000,aedd6080,2,e6768888,aedb7159,...) at malloc+0x120
zfs_kmem_alloc(20000,2,e67688b8,aed791db,20000,...) at zfs_kmem_alloc+0x13
zio_data_buf_alloc(20000,aedd9cc0,20000,1,20000,...) at zio_data_buf_alloc+0xd
arc_get_data_buf(ae166dc0,2,ca220690,b1f37450,e6768928,...) at 
arc_get_data_buf+0x23f
arc_buf_alloc(ae18d000,20000,ca220690,1,0,...) at arc_buf_alloc+0x9a
dbuf_read(ca220690,b1f37450,2,bf0933a0,1a7600,...) at dbuf_read+0xf4
dmu_tx_check_ioerr(0,d,0,a0a74880,0,...) at dmu_tx_check_ioerr+0x6c
dmu_tx_count_write(197600,0,10000,0,197600,...) at dmu_tx_count_write+0x3ce
dmu_tx_hold_write(bad1f800,5d52,0,197600,0,...) at dmu_tx_hold_write+0x50
zfs_freebsd_write(e6768b90,a055a4d5,0,0,0,...) at zfs_freebsd_write+0x1cf
VOP_WRITE_APV(aedd8540,e6768b90,b608b1d0,a053f500,241,...) at 
VOP_WRITE_APV+0x17c
vn_write(ae95e630,e6768c58,c1a72680,0,b608b1d0,...) at vn_write+0x250
dofilewrite(b608b1d0,4,ae95e630,e6768c58,ffffffff,...) at dofilewrite+0x9e
kern_writev(b608b1d0,4,e6768c58,805f000,10000,...) at kern_writev+0x60
write(b608b1d0,e6768d00,c,e6768c94,a034f435,...) at write+0x4f
syscall(e6768d38) at syscall+0x2f3

And second one, which holds arc_buf_t->b_hdr->hash_lock:

arc_lowmem(0,0,a054c56b,12c,a0a74088,...) at arc_lowmem+0x1c
kmem_malloc(a0a71090,20000,2,e69888b8,a047e7e5,...) at kmem_malloc+0x131
page_alloc(0,20000,e69888ab,2,aedd9da0,...) at page_alloc+0x27
uma_large_malloc(20000,2,0,0,0,...) at uma_large_malloc+0x55
malloc(20000,aedd6080,2,e6988900,aedb7159,...) at malloc+0x120
zfs_kmem_alloc(20000,2,e6988930,aed791db,20000,...) at zfs_kmem_alloc+0x13
zio_data_buf_alloc(20000,aedd9cc0,20000,1,20000,...) at zio_data_buf_alloc+0xd
arc_get_data_buf(ae166dc0,2,20000,0,b8644cf8,...) at arc_get_data_buf+0x23f
arc_read(c29ec228,ae18d000,af885080,aed80b6c,aed7d168,...) at arc_read+0x33d
dbuf_read(baf49460,c29ec228,12,c5f8a528,c6254cb0,...) at dbuf_read+0x463
dmu_buf_hold_array_by_dnode(20000,0,400,0,1,...) at 
dmu_buf_hold_array_by_dnode+0x1b0
dmu_buf_hold_array(58ef,0,20000,0,400,...) at dmu_buf_hold_array+0x4c
dmu_read_uio(c23da3c0,58ef,0,e6988c58,400,...) at dmu_read_uio+0x35
zfs_freebsd_read(e6988b90,a055a48c,adbbd6c0,adbbd6c0,adbbd6c0,...) at 
zfs_freebsd_read+0x3d8
VOP_READ_APV(aedd8540,e6988b90,b6f4eae0,a053f500,202,...) at VOP_READ_APV+0xd2
vn_read(adbbd6c0,e6988c58,b0c58e00,0,b6f4eae0,...) at vn_read+0x297
dofileread(b6f4eae0,3,adbbd6c0,e6988c58,ffffffff,...) at dofileread+0xa7
kern_readv(b6f4eae0,3,e6988c58,9f7fc670,400,...) at kern_readv+0x60
read(b6f4eae0,e6988d00,c,e6988c94,a034f435,...) at read+0x4f
syscall(e6988d38) at syscall+0x2f3

The arc_reclaim_thread thread deadlocks here:

sched_switch(ae8f9910,0,1,17a,a05b6e14,...) at sched_switch+0x16c
mi_switch(1,0,a0537906,1cc,aede1b8c,...) at mi_switch+0x306
sleepq_switch(aede1b8c,0,a0537906,21e,e611abe0,...) at sleepq_switch+0x113
sleepq_wait(aede1b8c,0,aedcef22,3,0,...) at sleepq_wait+0x65
_sx_xlock_hard(aede1b8c,ae8f9910,aedcecb7,447,e611ac1c,...) at 
_sx_xlock_hard+0x17e
_sx_xlock(aede1b8c,aedcecb7,447,0,0,...) at _sx_xlock+0x69
arc_buf_remove_ref(b4288ca8,cc943c94,519,cc943cd0,cc943dac,...) at 
arc_buf_remove_ref+0x58
dbuf_rele(cc943c94,cc943dac,cc95d348,cc943dac,35,...) at dbuf_rele+0x195
dbuf_clear(cc943dac,cc943dac,e611ac80,aed7cea0,cc943dac,...) at dbuf_clear+0x7f
dbuf_evict(cc943dac,cc99a3d4,e611ac90,aed788b5,cc99a3d4,...) at dbuf_evict+0xd
dbuf_do_evict(cc99a3d4,4879,e611acfc,aed78f0f,64,...) at dbuf_do_evict+0x44
arc_do_user_evicts(64,0,246,a056b498,1,...) at arc_do_user_evicts+0x51
arc_reclaim_thread(0,e611ad38,a0531203,326,ae8f76c0,...) at 
arc_reclaim_thread+0x36b
fork_exit(aed78ba4,0,e611ad38) at fork_exit+0xd1
fork_trampoline() at fork_trampoline+0x8

Let me convert the offsets to file:line as found in OpenSolaris code:

arc.c:1088              arc_buf_remove_ref+0x58
dbuf.c:1710/1713        dbuf_rele+0x195
dbuf.c:1308             dbuf_clear+0x7f
dbuf.c:233              dbuf_evict+0xd
dbuf.c:1453             dbuf_do_evict+0x44
arc.c:1314              arc_do_user_evicts+0x51
arc.c:1537              arc_reclaim_thread+0x36b

(In this dbuf.c:1710/1713 I'm not sure which line exactly it is, but it
doesn't matter from what I see.)

The most important part is dbuf.c:1308, which calls dbuf_rele() on the
parent dmu_buf_impl_t, which is already locked by the second thread, so
when we lock it in arc_buf_remove_ref() we deadlock, because the lock
is held by the thread waiting for memory.

Does my description make sense? Do you have any suggestions how to fix it?

-- 
Pawel Jakub Dawidek                       http://www.wheel.pl
pjd at FreeBSD.org                           http://www.FreeBSD.org
FreeBSD committer                         Am I Evil? Yes, I Am!
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 187 bytes
Desc: not available
URL: 
<http://mail.opensolaris.org/pipermail/zfs-code/attachments/20070428/44e6f82b/attachment.bin>

Reply via email to