On Tue, 03 Mar 2020 10:20:04 +0100, Mark Patruck wrote:

> After ~3 days with the system up and running, the crash after doing
> a "reboot" looks different. Now it's in handle_workitem_freeblocks(),
> according to objdump

The problem is that removed files were not actually getting removed
since process_worklist_item() was returning 0 even when it performed
work, causing the loop to exit early.  You can see this by running
df after removing files--the available blocks are returned to the
system very slowly if at all.

I've been testing the following diff which seems to be stable,
though it contains a couple different changes which should probably
be split up.

1) Return the count in process_worklist_item() regardless of the
   value of matchmnt (which can be removed from the args).

2) Don't pass LK_NOWAIT to process_worklist_item() when unmounting
   the file system.

3) Simpler starttime check in softdep_process_worklist() that
   is consistent with the syncer.  The check is now skipped if we
   are unmounting.

 - todd

Index: sys/kern/vfs_sync.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_sync.c,v
retrieving revision 1.63
diff -u -p -u -r1.63 vfs_sync.c
--- sys/kern/vfs_sync.c 20 Jan 2020 23:21:56 -0000      1.63
+++ sys/kern/vfs_sync.c 3 Mar 2020 14:51:15 -0000
@@ -53,7 +53,7 @@
 #include <sys/sched.h>
 
 #ifdef FFS_SOFTUPDATES
-int   softdep_process_worklist(struct mount *);
+int   softdep_process_worklist(struct mount *, int);
 #endif
 
 /*
@@ -203,7 +203,7 @@ syncer_thread(void *arg)
                /*
                 * Do soft update processing.
                 */
-               softdep_process_worklist(NULL);
+               softdep_process_worklist(NULL, 0);
 #endif
 
                /*
Index: sys/ufs/ffs/ffs_extern.h
===================================================================
RCS file: /cvs/src/sys/ufs/ffs/ffs_extern.h,v
retrieving revision 1.45
diff -u -p -u -r1.45 ffs_extern.h
--- sys/ufs/ffs/ffs_extern.h    20 Jan 2020 23:21:56 -0000      1.45
+++ sys/ufs/ffs/ffs_extern.h    3 Mar 2020 14:51:15 -0000
@@ -168,10 +168,10 @@ struct vop_vfree_args;
 struct vop_fsync_args;
 
 void  softdep_initialize(void);
-int   softdep_process_worklist(struct mount *);
+int   softdep_process_worklist(struct mount *, int);
 int   softdep_mount(struct vnode *, struct mount *, struct fs *,
           struct ucred *);
-int   softdep_flushworklist(struct mount *, int *, struct proc *);
+int   softdep_flushworklist(struct mount *, int *, struct proc *, int);
 int   softdep_flushfiles(struct mount *, int, struct proc *);
 void  softdep_update_inodeblock(struct inode *, struct buf *, int);
 void  softdep_load_inodeblock(struct inode *);
Index: sys/ufs/ffs/ffs_softdep.c
===================================================================
RCS file: /cvs/src/sys/ufs/ffs/ffs_softdep.c,v
retrieving revision 1.148
diff -u -p -u -r1.148 ffs_softdep.c
--- sys/ufs/ffs/ffs_softdep.c   4 Feb 2020 04:09:11 -0000       1.148
+++ sys/ufs/ffs/ffs_softdep.c   3 Mar 2020 14:51:15 -0000
@@ -163,7 +163,7 @@ STATIC      int inodedep_lookup(struct fs *, 
 STATIC int pagedep_lookup(struct inode *, daddr_t, int, struct pagedep **);
 STATIC void pause_timer(void *);
 STATIC int request_cleanup(int, int);
-STATIC int process_worklist_item(struct mount *, int);
+STATIC int process_worklist_item(int);
 STATIC void add_to_worklist(struct worklist *);
 
 /*
@@ -588,11 +588,11 @@ add_to_worklist(struct worklist *wk)
  * until all the old ones have been purged from the dependency lists.
  */
 int 
-softdep_process_worklist(struct mount *matchmnt)
+softdep_process_worklist(struct mount *matchmnt, int full)
 {
        struct proc *p = CURPROC;
-       int matchcnt, loopcount;
-       struct timeval starttime;
+       int cnt, matchcnt, loopcount;
+       time_t starttime;
 
        /*
         * First process any items on the delayed-free queue.
@@ -637,9 +637,12 @@ softdep_process_worklist(struct mount *m
                wakeup_one(&proc_waiting);
        }
        loopcount = 1;
-       getmicrouptime(&starttime);
+       starttime = time_second;
        while (num_on_worklist > 0) {
-               matchcnt += process_worklist_item(matchmnt, LK_NOWAIT);
+               cnt = process_worklist_item(full ? 0 : LK_NOWAIT);
+               if (cnt == 0)
+                       break;
+               matchcnt += cnt;
 
                /*
                 * If a umount operation wants to run the worklist
@@ -676,17 +679,8 @@ softdep_process_worklist(struct mount *m
                 * second. Otherwise the other syncer tasks may get
                 * excessively backlogged.
                 */
-               {
-                       struct timeval diff;
-                       struct timeval tv;
-
-                       getmicrouptime(&tv);
-                       timersub(&tv, &starttime, &diff);
-                       if (diff.tv_sec != 0 && matchmnt == NULL) {
-                               matchcnt = -1;
-                               break;
-                       }
-               }
+               if (!full && starttime != time_second)
+                       break;
 
                /*
                 * Process any new items on the delayed-free queue.
@@ -707,7 +701,7 @@ softdep_process_worklist(struct mount *m
  * Process one item on the worklist.
  */
 STATIC int
-process_worklist_item(struct mount *matchmnt, int flags)
+process_worklist_item(int flags)
 {
        struct worklist *wk, *wkend;
        struct dirrem *dirrem;
@@ -761,8 +755,7 @@ process_worklist_item(struct mount *matc
                        panic("%s: dirrem on suspended filesystem",
                                "process_worklist_item");
 #endif
-               if (mp == matchmnt)
-                       matchcnt += 1;
+               matchcnt += 1;
                handle_workitem_remove(WK_DIRREM(wk));
                break;
 
@@ -774,8 +767,7 @@ process_worklist_item(struct mount *matc
                        panic("%s: freeblks on suspended filesystem",
                                "process_worklist_item");
 #endif
-               if (mp == matchmnt)
-                       matchcnt += 1;
+               matchcnt += 1;
                handle_workitem_freeblocks(WK_FREEBLKS(wk));
                break;
 
@@ -787,8 +779,7 @@ process_worklist_item(struct mount *matc
                        panic("%s: freefrag on suspended filesystem",
                                "process_worklist_item");
 #endif
-               if (mp == matchmnt)
-                       matchcnt += 1;
+               matchcnt += 1;
                handle_workitem_freefrag(WK_FREEFRAG(wk));
                break;
 
@@ -800,8 +791,7 @@ process_worklist_item(struct mount *matc
                        panic("%s: freefile on suspended filesystem",
                                "process_worklist_item");
 #endif
-               if (mp == matchmnt)
-                       matchcnt += 1;
+               matchcnt += 1;
                handle_workitem_freefile(WK_FREEFILE(wk));
                break;
 
@@ -840,7 +830,7 @@ softdep_move_dependencies(struct buf *ol
  * Purge the work list of all items associated with a particular mount point.
  */
 int
-softdep_flushworklist(struct mount *oldmnt, int *countp, struct proc *p)
+softdep_flushworklist(struct mount *oldmnt, int *countp, struct proc *p, int 
full)
 {
        struct vnode *devvp;
        int count, error = 0;
@@ -862,7 +852,7 @@ softdep_flushworklist(struct mount *oldm
         */
        *countp = 0;
        devvp = VFSTOUFS(oldmnt)->um_devvp;
-       while ((count = softdep_process_worklist(oldmnt)) > 0) {
+       while ((count = softdep_process_worklist(oldmnt, full)) > 0) {
                *countp += count;
                vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
                error = VOP_FSYNC(devvp, p->p_ucred, MNT_WAIT, p);
@@ -897,7 +887,7 @@ softdep_flushfiles(struct mount *oldmnt,
                 */
                if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0)
                        break;
-               if ((error = softdep_flushworklist(oldmnt, &count, p)) != 0 ||
+               if ((error = softdep_flushworklist(oldmnt, &count, p, 1)) != 0 
||
                    count == 0)
                        break;
        }
@@ -5253,8 +5243,8 @@ request_cleanup(int resource, int islock
                atomic_setbits_int(&p->p_flag, P_SOFTDEP);
                if (islocked)
                        FREE_LOCK(&lk);
-               process_worklist_item(NULL, LK_NOWAIT);
-               process_worklist_item(NULL, LK_NOWAIT);
+               process_worklist_item(LK_NOWAIT);
+               process_worklist_item(LK_NOWAIT);
                atomic_clearbits_int(&p->p_flag, P_SOFTDEP);
                stat_worklist_push += 2;
                if (islocked)
Index: sys/ufs/ffs/ffs_softdep_stub.c
===================================================================
RCS file: /cvs/src/sys/ufs/ffs/ffs_softdep_stub.c,v
retrieving revision 1.18
diff -u -p -u -r1.18 ffs_softdep_stub.c
--- sys/ufs/ffs/ffs_softdep_stub.c      11 Jun 2013 16:42:18 -0000      1.18
+++ sys/ufs/ffs/ffs_softdep_stub.c      3 Mar 2020 14:51:15 -0000
@@ -168,7 +168,7 @@ softdep_fsync_mountdev(struct vnode *vp,
 }
 
 int
-softdep_flushworklist(struct mount *oldmnt, int *countp, struct proc *p)
+softdep_flushworklist(struct mount *oldmnt, int *countp, struct proc *p, int)
 {
        *countp = 0;
        return (0);
Index: sys/ufs/ffs/ffs_vfsops.c
===================================================================
RCS file: /cvs/src/sys/ufs/ffs/ffs_vfsops.c,v
retrieving revision 1.183
diff -u -p -u -r1.183 ffs_vfsops.c
--- sys/ufs/ffs/ffs_vfsops.c    21 Feb 2020 11:11:15 -0000      1.183
+++ sys/ufs/ffs/ffs_vfsops.c    3 Mar 2020 14:51:15 -0000
@@ -1257,7 +1257,7 @@ ffs_sync(struct mount *mp, int waitfor, 
         * Force stale file system control information to be flushed.
         */
        if ((ump->um_mountp->mnt_flag & MNT_SOFTDEP) && waitfor == MNT_WAIT) {
-               if ((error = softdep_flushworklist(ump->um_mountp, &count, p)))
+               if ((error = softdep_flushworklist(ump->um_mountp, &count, p, 
0)))
                        allerror = error;
                /* Flushed work items may create new vnodes to clean */
                if (count) 

Reply via email to