Some weeks ago I started my fs stress test (5 x fsstress+fsx+dbench) on a log enabled ffs1 file system backed by md(4).
Usually within hours I get a deadlock where a thread is waiting on "genput" but the page in question is neither BUSY nor WANTED. I suppose I tracked (*1) it down to three places, where we change page flags without holding the object lock. With this diff (*2) in place the test runs for > 48 hours. Using atomic ops here is not possible as flags is a 16bit value. Any objections against the change or other ideas to solve this problem? -- Juergen Hannken-Illjes - [email protected] - TU Braunschweig (Germany) *1) I added tracking code to the pages, 8 entries per cpu and got this: cpu/idx ticks lwp old flag new flag where 0/3: 1198948 e1bcb7e0 000d B- -> 000c -- uvm_page_unbusy:1532 0/4: 1198966 e1d6aa80 000c -- -> 000c -- genfs_do_putpages:1078 0/5: 1198979 e50c2d40 000c -- -> 000c -- genfs_do_putpages:1078 0/6: 1199425 e26e3020 0045 B- -> 0045 B- genfs_getpages:648 -CR 0/7: 1199425 e26e3020 0045 B- -> 0005 B- genfs_getpages:703 -F 0/0: 1199425 e26e3020 0005 B- -> 0005 B- uvm_fault_lower_done:2258 = !w 0/1: 1199425 e26e3020 0005 B- -> 0004 -- uvm_fault_lower_done:2266 -BFW 0/2: 1199426 e1bcb7e0 000d B- -> 000c -- uvm_page_unbusy:1532 -BW 1/6: 1198937 e2473000 0005 B- -> 0005 B- uvm_fault_lower_done:2258 1/7: 1198937 e2473000 0005 B- -> 0004 -- uvm_fault_lower_done:2266 1/0: 1198937 e2473000 000c -- -> 000d B- uvn_findpage:308 1/1: 1199425 e1ca2aa0 0045 B- -> 0045 B- genfs_do_putpages:1010 +W 1/2: 1199425 e1ca2aa0 0045 B- -> 0045 B- genfs_do_putpages:1023 = 1/3: 1199425 e1ca2aa0 0045 B- -> 0045 B- genfs_do_putpages:1025 = 1/4: 1199425 e26e3020 0004 -- -> 000c -- genfs_do_putpages:1078 +C 1/5: 1199425 e26e3020 000c -- -> 000d B- genfs_do_putpages:1094 +B Entry 1/1 should set the WANTED flag but apparently doesn't. Looks like entry 0/6 clobbered the flags here. *2) My current diff: Index: sys/miscfs/genfs/genfs_io.c =================================================================== RCS file: /cvsroot/src/sys/miscfs/genfs/genfs_io.c,v retrieving revision 1.43 diff -p -u -4 -r1.43 genfs_io.c --- sys/miscfs/genfs/genfs_io.c 19 Nov 2010 05:38:10 -0000 1.43 +++ sys/miscfs/genfs/genfs_io.c 22 Nov 2010 09:11:15 -0000 @@ -568,16 +568,18 @@ startover: memset((char *)kva + (offset - startoffset), 0, iobytes); skipbytes += iobytes; + mutex_enter(&uobj->vmobjlock); for (i = 0; i < holepages; i++) { if (memwrite) { pgs[pidx + i]->flags &= ~PG_CLEAN; } if (!blockalloc) { pgs[pidx + i]->flags |= PG_RDONLY; } } + mutex_exit(&uobj->vmobjlock); continue; } /* @@ -637,8 +639,9 @@ loopdone: npages << PAGE_SHIFT, 0, cred); UVMHIST_LOG(ubchist, "gop_alloc off 0x%x/0x%x -> %d", startoffset, npages << PAGE_SHIFT, error,0); if (!error) { + mutex_enter(&uobj->vmobjlock); for (i = 0; i < npages; i++) { struct vm_page *pg = pgs[i]; if (pg == NULL) { @@ -647,8 +650,9 @@ loopdone: pg->flags &= ~(PG_CLEAN|PG_RDONLY); UVMHIST_LOG(ubchist, "mark dirty pg %p", pg,0,0,0); } + mutex_exit(&uobj->vmobjlock); } } if (!glocked) { genfs_node_unlock(vp); Index: sys/uvm/uvm_bio.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_bio.c,v retrieving revision 1.70 diff -p -u -4 -r1.70 uvm_bio.c --- sys/uvm/uvm_bio.c 22 Jun 2010 18:34:50 -0000 1.70 +++ sys/uvm/uvm_bio.c 22 Nov 2010 09:11:18 -0000 @@ -644,8 +644,9 @@ ubc_release(void *va, int flags) if (zerolen) { memset((char *)umapva + endoff, 0, zerolen); } umap->flags &= ~UMAP_PAGES_LOCKED; + mutex_enter(&uobj->vmobjlock); mutex_enter(&uvm_pageqlock); for (i = 0; i < npages; i++) { rv = pmap_extract(pmap_kernel(), umapva + slot_offset + (i << PAGE_SHIFT), &pa); @@ -657,9 +658,8 @@ ubc_release(void *va, int flags) } mutex_exit(&uvm_pageqlock); pmap_kremove(umapva, ubc_winsize); pmap_update(pmap_kernel()); - mutex_enter(&uobj->vmobjlock); uvm_page_unbusy(pgs, npages); mutex_exit(&uobj->vmobjlock); unmapped = true; } else {
