On Tue, 03 Mar 2020 10:20:04 +0100, Mark Patruck wrote:
> After ~3 days with the system up and running, the crash after doing
> a "reboot" looks different. Now it's in handle_workitem_freeblocks(),
> according to objdump
The problem is that removed files were not actually getting removed
since process_worklist_item() was returning 0 even when it performed
work, causing the loop to exit early. You can see this by running
df after removing files--the available blocks are returned to the
system very slowly if at all.
I've been testing the following diff which seems to be stable,
though it contains a couple different changes which should probably
be split up.
1) Return the count in process_worklist_item() regardless of the
value of matchmnt (which can be removed from the args).
2) Don't pass LK_NOWAIT to process_worklist_item() when unmounting
the file system.
3) Simpler starttime check in softdep_process_worklist() that
is consistent with the syncer. The check is now skipped if we
are unmounting.
- todd
Index: sys/kern/vfs_sync.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_sync.c,v
retrieving revision 1.63
diff -u -p -u -r1.63 vfs_sync.c
--- sys/kern/vfs_sync.c 20 Jan 2020 23:21:56 -0000 1.63
+++ sys/kern/vfs_sync.c 3 Mar 2020 14:51:15 -0000
@@ -53,7 +53,7 @@
#include <sys/sched.h>
#ifdef FFS_SOFTUPDATES
-int softdep_process_worklist(struct mount *);
+int softdep_process_worklist(struct mount *, int);
#endif
/*
@@ -203,7 +203,7 @@ syncer_thread(void *arg)
/*
* Do soft update processing.
*/
- softdep_process_worklist(NULL);
+ softdep_process_worklist(NULL, 0);
#endif
/*
Index: sys/ufs/ffs/ffs_extern.h
===================================================================
RCS file: /cvs/src/sys/ufs/ffs/ffs_extern.h,v
retrieving revision 1.45
diff -u -p -u -r1.45 ffs_extern.h
--- sys/ufs/ffs/ffs_extern.h 20 Jan 2020 23:21:56 -0000 1.45
+++ sys/ufs/ffs/ffs_extern.h 3 Mar 2020 14:51:15 -0000
@@ -168,10 +168,10 @@ struct vop_vfree_args;
struct vop_fsync_args;
void softdep_initialize(void);
-int softdep_process_worklist(struct mount *);
+int softdep_process_worklist(struct mount *, int);
int softdep_mount(struct vnode *, struct mount *, struct fs *,
struct ucred *);
-int softdep_flushworklist(struct mount *, int *, struct proc *);
+int softdep_flushworklist(struct mount *, int *, struct proc *, int);
int softdep_flushfiles(struct mount *, int, struct proc *);
void softdep_update_inodeblock(struct inode *, struct buf *, int);
void softdep_load_inodeblock(struct inode *);
Index: sys/ufs/ffs/ffs_softdep.c
===================================================================
RCS file: /cvs/src/sys/ufs/ffs/ffs_softdep.c,v
retrieving revision 1.148
diff -u -p -u -r1.148 ffs_softdep.c
--- sys/ufs/ffs/ffs_softdep.c 4 Feb 2020 04:09:11 -0000 1.148
+++ sys/ufs/ffs/ffs_softdep.c 3 Mar 2020 14:51:15 -0000
@@ -163,7 +163,7 @@ STATIC int inodedep_lookup(struct fs *,
STATIC int pagedep_lookup(struct inode *, daddr_t, int, struct pagedep **);
STATIC void pause_timer(void *);
STATIC int request_cleanup(int, int);
-STATIC int process_worklist_item(struct mount *, int);
+STATIC int process_worklist_item(int);
STATIC void add_to_worklist(struct worklist *);
/*
@@ -588,11 +588,11 @@ add_to_worklist(struct worklist *wk)
* until all the old ones have been purged from the dependency lists.
*/
int
-softdep_process_worklist(struct mount *matchmnt)
+softdep_process_worklist(struct mount *matchmnt, int full)
{
struct proc *p = CURPROC;
- int matchcnt, loopcount;
- struct timeval starttime;
+ int cnt, matchcnt, loopcount;
+ time_t starttime;
/*
* First process any items on the delayed-free queue.
@@ -637,9 +637,12 @@ softdep_process_worklist(struct mount *m
wakeup_one(&proc_waiting);
}
loopcount = 1;
- getmicrouptime(&starttime);
+ starttime = time_second;
while (num_on_worklist > 0) {
- matchcnt += process_worklist_item(matchmnt, LK_NOWAIT);
+ cnt = process_worklist_item(full ? 0 : LK_NOWAIT);
+ if (cnt == 0)
+ break;
+ matchcnt += cnt;
/*
* If a umount operation wants to run the worklist
@@ -676,17 +679,8 @@ softdep_process_worklist(struct mount *m
* second. Otherwise the other syncer tasks may get
* excessively backlogged.
*/
- {
- struct timeval diff;
- struct timeval tv;
-
- getmicrouptime(&tv);
- timersub(&tv, &starttime, &diff);
- if (diff.tv_sec != 0 && matchmnt == NULL) {
- matchcnt = -1;
- break;
- }
- }
+ if (!full && starttime != time_second)
+ break;
/*
* Process any new items on the delayed-free queue.
@@ -707,7 +701,7 @@ softdep_process_worklist(struct mount *m
* Process one item on the worklist.
*/
STATIC int
-process_worklist_item(struct mount *matchmnt, int flags)
+process_worklist_item(int flags)
{
struct worklist *wk, *wkend;
struct dirrem *dirrem;
@@ -761,8 +755,7 @@ process_worklist_item(struct mount *matc
panic("%s: dirrem on suspended filesystem",
"process_worklist_item");
#endif
- if (mp == matchmnt)
- matchcnt += 1;
+ matchcnt += 1;
handle_workitem_remove(WK_DIRREM(wk));
break;
@@ -774,8 +767,7 @@ process_worklist_item(struct mount *matc
panic("%s: freeblks on suspended filesystem",
"process_worklist_item");
#endif
- if (mp == matchmnt)
- matchcnt += 1;
+ matchcnt += 1;
handle_workitem_freeblocks(WK_FREEBLKS(wk));
break;
@@ -787,8 +779,7 @@ process_worklist_item(struct mount *matc
panic("%s: freefrag on suspended filesystem",
"process_worklist_item");
#endif
- if (mp == matchmnt)
- matchcnt += 1;
+ matchcnt += 1;
handle_workitem_freefrag(WK_FREEFRAG(wk));
break;
@@ -800,8 +791,7 @@ process_worklist_item(struct mount *matc
panic("%s: freefile on suspended filesystem",
"process_worklist_item");
#endif
- if (mp == matchmnt)
- matchcnt += 1;
+ matchcnt += 1;
handle_workitem_freefile(WK_FREEFILE(wk));
break;
@@ -840,7 +830,7 @@ softdep_move_dependencies(struct buf *ol
* Purge the work list of all items associated with a particular mount point.
*/
int
-softdep_flushworklist(struct mount *oldmnt, int *countp, struct proc *p)
+softdep_flushworklist(struct mount *oldmnt, int *countp, struct proc *p, int
full)
{
struct vnode *devvp;
int count, error = 0;
@@ -862,7 +852,7 @@ softdep_flushworklist(struct mount *oldm
*/
*countp = 0;
devvp = VFSTOUFS(oldmnt)->um_devvp;
- while ((count = softdep_process_worklist(oldmnt)) > 0) {
+ while ((count = softdep_process_worklist(oldmnt, full)) > 0) {
*countp += count;
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
error = VOP_FSYNC(devvp, p->p_ucred, MNT_WAIT, p);
@@ -897,7 +887,7 @@ softdep_flushfiles(struct mount *oldmnt,
*/
if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0)
break;
- if ((error = softdep_flushworklist(oldmnt, &count, p)) != 0 ||
+ if ((error = softdep_flushworklist(oldmnt, &count, p, 1)) != 0
||
count == 0)
break;
}
@@ -5253,8 +5243,8 @@ request_cleanup(int resource, int islock
atomic_setbits_int(&p->p_flag, P_SOFTDEP);
if (islocked)
FREE_LOCK(&lk);
- process_worklist_item(NULL, LK_NOWAIT);
- process_worklist_item(NULL, LK_NOWAIT);
+ process_worklist_item(LK_NOWAIT);
+ process_worklist_item(LK_NOWAIT);
atomic_clearbits_int(&p->p_flag, P_SOFTDEP);
stat_worklist_push += 2;
if (islocked)
Index: sys/ufs/ffs/ffs_softdep_stub.c
===================================================================
RCS file: /cvs/src/sys/ufs/ffs/ffs_softdep_stub.c,v
retrieving revision 1.18
diff -u -p -u -r1.18 ffs_softdep_stub.c
--- sys/ufs/ffs/ffs_softdep_stub.c 11 Jun 2013 16:42:18 -0000 1.18
+++ sys/ufs/ffs/ffs_softdep_stub.c 3 Mar 2020 14:51:15 -0000
@@ -168,7 +168,7 @@ softdep_fsync_mountdev(struct vnode *vp,
}
int
-softdep_flushworklist(struct mount *oldmnt, int *countp, struct proc *p)
+softdep_flushworklist(struct mount *oldmnt, int *countp, struct proc *p, int)
{
*countp = 0;
return (0);
Index: sys/ufs/ffs/ffs_vfsops.c
===================================================================
RCS file: /cvs/src/sys/ufs/ffs/ffs_vfsops.c,v
retrieving revision 1.183
diff -u -p -u -r1.183 ffs_vfsops.c
--- sys/ufs/ffs/ffs_vfsops.c 21 Feb 2020 11:11:15 -0000 1.183
+++ sys/ufs/ffs/ffs_vfsops.c 3 Mar 2020 14:51:15 -0000
@@ -1257,7 +1257,7 @@ ffs_sync(struct mount *mp, int waitfor,
* Force stale file system control information to be flushed.
*/
if ((ump->um_mountp->mnt_flag & MNT_SOFTDEP) && waitfor == MNT_WAIT) {
- if ((error = softdep_flushworklist(ump->um_mountp, &count, p)))
+ if ((error = softdep_flushworklist(ump->um_mountp, &count, p,
0)))
allerror = error;
/* Flushed work items may create new vnodes to clean */
if (count)