Author: mckusick
Date: Tue Apr  5 21:26:05 2011
New Revision: 220374
URL: http://svn.freebsd.org/changeset/base/220374

Log:
  Be far more persistent in reclaiming blocks and inodes before giving
  up and declaring a filesystem out of space. Especially necessary when
  running on a small filesystem. With this improvement, it should be
  possible to use soft updates on a small root filesystem.
  
  Kudos to: Peter Holm
  Testing by: Peter Holm
  MFC: 2 weeks

Modified:
  head/sys/ufs/ffs/ffs_alloc.c
  head/sys/ufs/ffs/ffs_extern.h
  head/sys/ufs/ffs/ffs_softdep.c

Modified: head/sys/ufs/ffs/ffs_alloc.c
==============================================================================
--- head/sys/ufs/ffs/ffs_alloc.c        Tue Apr  5 20:23:59 2011        
(r220373)
+++ head/sys/ufs/ffs/ffs_alloc.c        Tue Apr  5 21:26:05 2011        
(r220374)
@@ -217,9 +217,9 @@ nospace:
        (void) chkdq(ip, -btodb(size), cred, FORCE);
        UFS_LOCK(ump);
 #endif
-       if (fs->fs_pendingblocks > 0 && reclaimed == 0) {
+       if (reclaimed == 0) {
                reclaimed = 1;
-               softdep_request_cleanup(fs, ITOV(ip), FLUSH_BLOCKS_WAIT);
+               softdep_request_cleanup(fs, ITOV(ip), cred, FLUSH_BLOCKS_WAIT);
                goto retry;
        }
        UFS_UNLOCK(ump);
@@ -418,9 +418,9 @@ nospace:
        /*
         * no space available
         */
-       if (fs->fs_pendingblocks > 0 && reclaimed == 0) {
+       if (reclaimed == 0) {
                reclaimed = 1;
-               softdep_request_cleanup(fs, vp, FLUSH_BLOCKS_WAIT);
+               softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT);
                UFS_UNLOCK(ump);
                if (bp) {
                        brelse(bp);
@@ -1023,7 +1023,7 @@ dup_alloc:
 noinodes:
        if (fs->fs_pendinginodes > 0 && reclaimed == 0) {
                reclaimed = 1;
-               softdep_request_cleanup(fs, pvp, FLUSH_INODES_WAIT);
+               softdep_request_cleanup(fs, pvp, cred, FLUSH_INODES_WAIT);
                goto retry;
        }
        UFS_UNLOCK(ump);

Modified: head/sys/ufs/ffs/ffs_extern.h
==============================================================================
--- head/sys/ufs/ffs/ffs_extern.h       Tue Apr  5 20:23:59 2011        
(r220373)
+++ head/sys/ufs/ffs/ffs_extern.h       Tue Apr  5 21:26:05 2011        
(r220374)
@@ -120,7 +120,8 @@ int softdep_flushfiles(struct mount *, i
 void   softdep_update_inodeblock(struct inode *, struct buf *, int);
 void   softdep_load_inodeblock(struct inode *);
 void   softdep_freefile(struct vnode *, ino_t, int);
-int    softdep_request_cleanup(struct fs *, struct vnode *, int);
+int    softdep_request_cleanup(struct fs *, struct vnode *,
+           struct ucred *, int);
 void   softdep_setup_freeblocks(struct inode *, off_t, int);
 void   softdep_setup_inomapdep(struct buf *, struct inode *, ino_t);
 void   softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t,

Modified: head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- head/sys/ufs/ffs/ffs_softdep.c      Tue Apr  5 20:23:59 2011        
(r220373)
+++ head/sys/ufs/ffs/ffs_softdep.c      Tue Apr  5 21:26:05 2011        
(r220374)
@@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
@@ -514,9 +515,10 @@ softdep_releasefile(ip)
 }
 
 int
-softdep_request_cleanup(fs, vp, resource)
+softdep_request_cleanup(fs, vp, cred, resource)
        struct fs *fs;
        struct vnode *vp;
+       struct ucred *cred;
        int resource;
 {
 
@@ -1131,6 +1133,11 @@ static int stat_jwait_filepage;  /* Times
 static int stat_jwait_freeblks;        /* Times blocked in jwait() for 
freeblks. */
 static int stat_jwait_inode;   /* Times blocked in jwait() for inodes. */
 static int stat_jwait_newblk;  /* Times blocked in jwait() for newblks. */
+static int stat_cleanup_high_delay; /* Maximum cleanup delay (in ticks) */
+static int stat_cleanup_blkrequests; /* Number of block cleanup requests */
+static int stat_cleanup_inorequests; /* Number of inode cleanup requests */
+static int stat_cleanup_retries; /* Number of cleanups that needed to flush */
+static int stat_cleanup_failures; /* Number of cleanup requests that failed */
 
 SYSCTL_INT(_debug_softdep, OID_AUTO, max_softdeps, CTLFLAG_RW,
     &max_softdeps, 0, "");
@@ -1176,6 +1183,16 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, jwa
     &stat_jwait_inode, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_newblk, CTLFLAG_RW,
     &stat_jwait_newblk, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_blkrequests, CTLFLAG_RW,
+    &stat_cleanup_blkrequests, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_inorequests, CTLFLAG_RW,
+    &stat_cleanup_inorequests, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_high_delay, CTLFLAG_RW,
+    &stat_cleanup_high_delay, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_retries, CTLFLAG_RW,
+    &stat_cleanup_retries, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_failures, CTLFLAG_RW,
+    &stat_cleanup_failures, 0, "");
 
 SYSCTL_DECL(_vfs_ffs);
 
@@ -10879,29 +10896,29 @@ softdep_slowdown(vp)
  * Because this process holds inodes locked, we cannot handle any remove
  * requests that might block on a locked inode as that could lead to
  * deadlock. If the worklist yields none of the requested resource,
- * encourage the syncer daemon to help us. In no event will we try for
- * longer than tickdelay seconds.
+ * start syncing out vnodes to free up the needed space.
  */
 int
-softdep_request_cleanup(fs, vp, resource)
+softdep_request_cleanup(fs, vp, cred, resource)
        struct fs *fs;
        struct vnode *vp;
+       struct ucred *cred;
        int resource;
 {
        struct ufsmount *ump;
+       struct mount *mp;
+       struct vnode *lvp, *mvp;
        long starttime;
        ufs2_daddr_t needed;
        int error;
 
+       mp = vp->v_mount;
        ump = VTOI(vp)->i_ump;
        mtx_assert(UFS_MTX(ump), MA_OWNED);
        if (resource == FLUSH_BLOCKS_WAIT)
-               needed = fs->fs_cstotal.cs_nbfree + fs->fs_contigsumsize;
-       else if (resource == FLUSH_INODES_WAIT)
-               needed = fs->fs_cstotal.cs_nifree + 2;
+               stat_cleanup_blkrequests += 1;
        else
-               return (0);
-       starttime = time_second + tickdelay;
+               stat_cleanup_inorequests += 1;
        /*
         * If we are being called because of a process doing a
         * copy-on-write, then it is not safe to update the vnode
@@ -10914,12 +10931,56 @@ softdep_request_cleanup(fs, vp, resource
                if (error != 0)
                        return (0);
        }
-       while ((resource == FLUSH_BLOCKS_WAIT && fs->fs_pendingblocks > 0 &&
+       /*
+        * If we are in need of resources, consider pausing for
+        * tickdelay to give ourselves some breathing room.
+        */
+       UFS_UNLOCK(ump);
+       ACQUIRE_LOCK(&lk);
+       request_cleanup(UFSTOVFS(ump), resource);
+       FREE_LOCK(&lk);
+       UFS_LOCK(ump);
+       /*
+        * Now clean up at least as many resources as we will need.
+        *
+        * When requested to clean up inodes, the number that are needed
+        * is set by the number of simultaneous writers (mnt_writeopcount)
+        * plus a bit of slop (2) in case some more writers show up while
+        * we are cleaning.
+        *
+        * When requested to free up space, the amount of space that
+        * we need is enough blocks to allocate a full-sized segment
+        * (fs_contigsumsize). The number of such segments that will
+        * be needed is set by the number of simultaneous writers
+        * (mnt_writeopcount) plus a bit of slop (2) in case some more
+        * writers show up while we are cleaning.
+        *
+        * Additionally, if we are unpriviledged and allocating space,
+        * we need to ensure that we clean up enough blocks to get the
+        * needed number of blocks over the threshhold of the minimum
+        * number of blocks required to be kept free by the filesystem
+        * (fs_minfree).
+        */
+       if (resource == FLUSH_INODES_WAIT) {
+               needed = vp->v_mount->mnt_writeopcount + 2;
+       } else if (resource == FLUSH_BLOCKS_WAIT) {
+               needed = (vp->v_mount->mnt_writeopcount + 2) *
+                   fs->fs_contigsumsize;
+               if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, 0))
+                       needed += fragstoblks(fs,
+                           roundup((fs->fs_dsize * fs->fs_minfree / 100) -
+                           fs->fs_cstotal.cs_nffree, fs->fs_frag));
+       } else {
+               printf("softdep_request_cleanup: Unknown resource type %d\n",
+                   resource);
+               return (0);
+       }
+       starttime = time_second;
+retry:
+       while ((resource == FLUSH_BLOCKS_WAIT && ump->softdep_on_worklist > 0 &&
                fs->fs_cstotal.cs_nbfree <= needed) ||
               (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
                fs->fs_cstotal.cs_nifree <= needed)) {
-               if (time_second > starttime)
-                       return (0);
                UFS_UNLOCK(ump);
                ACQUIRE_LOCK(&lk);
                process_removes(vp);
@@ -10930,10 +10991,60 @@ softdep_request_cleanup(fs, vp, resource
                        UFS_LOCK(ump);
                        continue;
                }
-               request_cleanup(UFSTOVFS(ump), resource);
                FREE_LOCK(&lk);
                UFS_LOCK(ump);
        }
+       /*
+        * If we still need resources and there are no more worklist
+        * entries to process to obtain them, we have to start flushing
+        * the dirty vnodes to force the release of additional requests
+        * to the worklist that we can then process to reap addition
+        * resources. We walk the vnodes associated with the mount point
+        * until we get the needed worklist requests that we can reap.
+        */
+       if ((resource == FLUSH_BLOCKS_WAIT && 
+            fs->fs_cstotal.cs_nbfree <= needed) ||
+           (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
+            fs->fs_cstotal.cs_nifree <= needed)) {
+               UFS_UNLOCK(ump);
+               MNT_ILOCK(mp);
+               MNT_VNODE_FOREACH(lvp, mp, mvp) {
+                       UFS_LOCK(ump);
+                       if (ump->softdep_on_worklist > 0) {
+                               UFS_UNLOCK(ump);
+                               MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
+                               MNT_IUNLOCK(mp);
+                               UFS_LOCK(ump);
+                               stat_cleanup_retries += 1;
+                               goto retry;
+                       }
+                       UFS_UNLOCK(ump);
+                       VI_LOCK(lvp);
+                       if (TAILQ_FIRST(&lvp->v_bufobj.bo_dirty.bv_hd) == 0 ||
+                           VOP_ISLOCKED(lvp) != 0) {
+                               VI_UNLOCK(lvp);
+                               continue;
+                       }
+                       MNT_IUNLOCK(mp);
+                       if (vget(lvp, LK_EXCLUSIVE | LK_INTERLOCK, curthread)) {
+                               MNT_ILOCK(mp);
+                               continue;
+                       }
+                       if (lvp->v_vflag & VV_NOSYNC) { /* unlinked */
+                               vput(lvp);
+                               MNT_ILOCK(mp);
+                               continue;
+                       }
+                       (void) ffs_syncvnode(lvp, MNT_WAIT);
+                       vput(lvp);
+                       MNT_ILOCK(mp);
+               }
+               MNT_IUNLOCK(mp);
+               stat_cleanup_failures += 1;
+               UFS_LOCK(ump);
+       }
+       if (time_second - starttime > stat_cleanup_high_delay)
+               stat_cleanup_high_delay = time_second - starttime;
        return (1);
 }
 
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to