On 2020-02-28 21:57, Todd C. Miller wrote:
This sounds like the loop in softdep_process_worklist() is never
exiting.  It shouldn't run for more than a second, though.

FreeBSD breaks out of the loop if process_worklist_item() can't
make progress.  You could try the following (untested) diff to see
if it changes the behavior.

After ~11h with your diff, the system was still up and running,
so i decided to reboot. It crashed while syncing disks.


syncing disks...uvm_fault(0xfffffd83a134d668, 0x20, 0, 1) -> e
kernel: page fault trap, code=0
Stopped at      handle_workitem_freefile+0x2a:  movq    0x20(%rax),%rcx
ddb{0}>

ddb{0}> trace
handle_workitem_freefile(fffffd8369e3be70) at handle_workitem_freefile+0x2a
process_worklist_item(ffff8000001b8800,40) at process_worklist_item+0x1c9
softdep_process_worklist(ffff8000001b8800) at softdep_process_worklist+0xed
softdep_flushworklist(ffff8000001b8800,ffff800022516218,ffff800022556d58) at so
ftdep_flushworklist+0xb8
ffs_sync(ffff8000001b8800,1,0,fffffd841f7c2480,ffff800022556d58) at 
ffs_sync+0xdd
dounmount_leaf(ffff8000001b8800,80000,ffff800022556d58) at dounmount_leaf+0xaa
dounmount(ffff8000001b8800,80000,ffff800022556d58) at dounmount+0xfc
vfs_unmountall() at vfs_unmountall+0x8e
vfs_shutdown(ffff800022556d58) at vfs_shutdown+0x3b
boot(0) at boot+0x6c
reboot(0) at reboot+0x5c
sys_reboot(ffff800022556d58,ffff800022516470,ffff8000225164d0) at 
sys_reboot+0x7e
syscall(ffff800022516540) at syscall+0x389
Xsyscall() at Xsyscall+0x128
end of kernel
end trace frame: 0x7f7ffffee610, count: -14


ddb{0}> ps
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
*47072  417851      1      0  7         0x3                reboot
 95768  197147      0      0  3     0x14200  bored         smr
 23039  211890      0      0  2     0x14200                zerothread
 35233   64895      0      0  3     0x14200  aiodoned      aiodoned
 71347  316690      0      0  3     0x14200  syncer        update
 44681  504129      0      0  3     0x14200  cleaner       cleaner
 90819  193944      0      0  3     0x14200  reaper        reaper
 96855  230075      0      0  3     0x14200  pgdaemon      pagedaemon
 47917   12510      0      0  3     0x14200  bored         crynlk
 20365   68713      0      0  3     0x14200  bored         crypto
 21791  476177      0      0  3     0x14200  usbtsk        usbtask
 70170  396785      0      0  3     0x14200  usbatsk       usbatsk
 44347   23905      0      0  3  0x40014200  acpi0         acpi0
 27912   30285      0      0  7  0x40014200                idle7
 22700  306163      0      0  7  0x40014200                idle6
 63705  100085      0      0  7  0x40014200                idle5
 17377  411898      0      0  7  0x40014200                idle4
 84659  510633      0      0  7  0x40014200                idle3
 54830  118160      0      0  7  0x40014200                idle2
 54818  330925      0      0  7  0x40014200                idle1
 70767  292562      0      0  3     0x14200  bored         sensors
 44681  504129      0      0  3     0x14200  cleaner       cleaner
 90819  193944      0      0  3     0x14200  reaper        reaper
 96855  230075      0      0  3     0x14200  pgdaemon      pagedaemon
 47917   12510      0      0  3     0x14200  bored         crynlk
 20365   68713      0      0  3     0x14200  bored         crypto
 21791  476177      0      0  3     0x14200  usbtsk        usbtask
 70170  396785      0      0  3     0x14200  usbatsk       usbatsk
 44347   23905      0      0  3  0x40014200  acpi0         acpi0
 27912   30285      0      0  7  0x40014200                idle7
 22700  306163      0      0  7  0x40014200                idle6
 63705  100085      0      0  7  0x40014200                idle5
 17377  411898      0      0  7  0x40014200                idle4
 84659  510633      0      0  7  0x40014200                idle3
 54830  118160      0      0  7  0x40014200                idle2
 54818  330925      0      0  7  0x40014200                idle1
 70767  292562      0      0  3     0x14200  bored         sensors
 59264  346295      0      0  3     0x14200  bored         softnet
 10461  133156      0      0  3     0x14200  bored         systqmp
  1863  323920      0      0  3     0x14200  bored         systq
 11411  181100      0      0  2  0x40014200                softclock
  3128  136983      0      0  3  0x40014200                idle0
     1   52990      0      0  3        0x82  wait          init
     0       0     -1      0  3     0x10200  scheduler     swapper


ddb{0}> show registers
rdi               0xfffffd8369e3be70
rsi                             0x40
rbp               0xffff800022515f50
rbx                              0x1
rdx               0xfe00000007ff1e3a
rcx                            0x202
rax                                0
r8                                 0
r9                0xffffffffffffffff
r10               0x208d459308eb8ce2
r11               0x572f46b22ffb6c93
r12                             0x40
r13                              0x1
r14               0xfffffd8369e3be70
r15               0xffff8000001b8800
rip               0xffffffff812fb35a    handle_workitem_freefile+0x2a
cs                               0x8
rflags                       0x10282    __ALIGN_SIZE+0xf282
rsp               0xffff800022515d70
ss                              0x10
handle_workitem_freefile+0x2a:  movq    0x20(%rax),%rcx


        -Mark

  - todd

Index: /sys/ufs/ffs/ffs_softdep.c
===================================================================
RCS file: /cvs/src/sys/ufs/ffs/ffs_softdep.c,v
retrieving revision 1.148
diff -u -p -u -r1.148 ffs_softdep.c
--- /sys/ufs/ffs/ffs_softdep.c  4 Feb 2020 04:09:11 -0000       1.148
+++ /sys/ufs/ffs/ffs_softdep.c  28 Feb 2020 20:42:39 -0000
@@ -591,7 +591,7 @@ int
  softdep_process_worklist(struct mount *matchmnt)
  {
        struct proc *p = CURPROC;
-       int matchcnt, loopcount;
+       int cnt, matchcnt, loopcount;
        struct timeval starttime;
/*
@@ -639,7 +639,9 @@ softdep_process_worklist(struct mount *m
        loopcount = 1;
        getmicrouptime(&starttime);
        while (num_on_worklist > 0) {
-               matchcnt += process_worklist_item(matchmnt, LK_NOWAIT);
+               if ((cnt = process_worklist_item(matchmnt, LK_NOWAIT)) == 0)
+                       break;
+               matchcnt += cnt;
/*
                 * If a umount operation wants to run the worklist


--
Mark Patruck ( mark at wrapped.cx )
GPG key 0xF2865E51 / 187F F6D3 EE04 1DCE 1C74  F644 0D3C F66F F286 5E51

https://www.wrapped.cx

Reply via email to