Author: jeff
Date: Fri Jun 10 22:48:35 2011
New Revision: 222958
URL: http://svn.freebsd.org/changeset/base/222958

Log:
  Implement fully asynchronous partial truncation with softupdates journaling
  to resolve errors which can cause corruption on recovery with the old
  synchronous mechanism.
  
   - Append partial truncation freework structures to indirdeps while
     truncation is proceeding.  These prevent new block pointers from
     becoming valid until truncation completes and serialize truncations.
   - On completion of a partial truncate journal work waits for zeroed
     pointers to hit indirects.
   - softdep_journal_freeblocks() handles last frag allocation and last
     block zeroing.
   - vtruncbuf/ffs_page_remove moved into softdep_*_freeblocks() so it
     is only implemented in one place.
   - Block allocation failure handling moved up one level so it does not
     proceed with buf locks held.  This permits us to do more extensive
     reclaims when filesystem space is exhausted.
   - softdep_sync_metadata() is broken into two parts, the first executes
     once at the start of ffs_syncvnode() and flushes truncations and
     inode dependencies.  The second is called on each locked buf.  This
     eliminates excessive looping and rollbacks.
   - Improve the mechanism in process_worklist_item() that handles
     acquiring vnode locks for handle_workitem_remove() so that it works
     more generally and does not loop excessively over the same worklist
     items on each call.
   - Don't corrupt directories by zeroing the tail in fsck.  This is only
     done for regular files.
   - Push a fsync complete record for files that need it so the checker
     knows a truncation in the journal is no longer valid.
  
  Discussed with:       mckusick, kib (ffs_pages_remove and ffs_truncate parts)
  Tested by:    pho

Modified:
  head/sbin/fsck_ffs/suj.c
  head/sys/sys/vnode.h
  head/sys/ufs/ffs/ffs_alloc.c
  head/sys/ufs/ffs/ffs_balloc.c
  head/sys/ufs/ffs/ffs_extern.h
  head/sys/ufs/ffs/ffs_inode.c
  head/sys/ufs/ffs/ffs_softdep.c
  head/sys/ufs/ffs/ffs_vfsops.c
  head/sys/ufs/ffs/ffs_vnops.c
  head/sys/ufs/ffs/fs.h
  head/sys/ufs/ffs/softdep.h
  head/sys/ufs/ufs/inode.h
  head/sys/ufs/ufs/ufsmount.h

Modified: head/sbin/fsck_ffs/suj.c
==============================================================================
--- head/sbin/fsck_ffs/suj.c    Fri Jun 10 22:42:00 2011        (r222957)
+++ head/sbin/fsck_ffs/suj.c    Fri Jun 10 22:48:35 2011        (r222958)
@@ -1604,7 +1604,7 @@ ino_trunc(ino_t ino, off_t size)
         * uninitialized space later.
         */
        off = blkoff(fs, size);
-       if (off) {
+       if (off && DIP(ip, di_mode) != IFDIR) {
                uint8_t *buf;
                long clrsize;
 
@@ -1775,13 +1775,18 @@ cg_trunc(struct suj_cg *sc)
        struct suj_ino *sino;
        int i;
 
-       for (i = 0; i < SUJ_HASHSIZE; i++)
-               LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
+       for (i = 0; i < SUJ_HASHSIZE; i++) {
+               LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) {
                        if (sino->si_trunc) {
                                ino_trunc(sino->si_ino,
                                    sino->si_trunc->jt_size);
+                               sino->si_blkadj = 0;
                                sino->si_trunc = NULL;
                        }
+                       if (sino->si_blkadj)
+                               ino_adjblks(sino);
+               }
+       }
 }
 
 /*
@@ -1791,7 +1796,6 @@ cg_trunc(struct suj_cg *sc)
 static void
 cg_check_blk(struct suj_cg *sc)
 {
-       struct suj_ino *sino;
        struct suj_blk *sblk;
        int i;
 
@@ -1799,15 +1803,6 @@ cg_check_blk(struct suj_cg *sc)
        for (i = 0; i < SUJ_HASHSIZE; i++)
                LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next)
                        blk_check(sblk);
-       /*
-        * Now that we've freed blocks which are not referenced we
-        * make a second pass over all inodes to adjust their block
-        * counts.
-        */
-       for (i = 0; i < SUJ_HASHSIZE; i++)
-               LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
-                       if (sino->si_blkadj)
-                               ino_adjblks(sino);
 }
 
 /*
@@ -1961,14 +1956,7 @@ ino_append(union jrec *rec)
                    "parent %d, diroff %jd\n",
                    refrec->jr_op, refrec->jr_ino, refrec->jr_nlink,
                    refrec->jr_parent, refrec->jr_diroff);
-       /*
-        * Lookup the ino and clear truncate if one is found.  Partial
-        * truncates are always done synchronously so if we discover
-        * an operation that requires a lock the truncation has completed
-        * and can be discarded.
-        */
        sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1);
-       sino->si_trunc = NULL;
        sino->si_hasrecs = 1;
        srec = errmalloc(sizeof(*srec));
        srec->sr_rec = rec;
@@ -2174,9 +2162,7 @@ blk_build(struct jblkrec *blkrec)
        struct suj_rec *srec;
        struct suj_blk *sblk;
        struct jblkrec *blkrn;
-       struct suj_ino *sino;
        ufs2_daddr_t blk;
-       off_t foff;
        int frag;
 
        if (debug)
@@ -2185,17 +2171,6 @@ blk_build(struct jblkrec *blkrec)
                    blkrec->jb_op, blkrec->jb_blkno, blkrec->jb_frags,
                    blkrec->jb_oldfrags, blkrec->jb_ino, blkrec->jb_lbn);
 
-       /*
-        * Look up the inode and clear the truncate if any lbns after the
-        * truncate lbn are freed or allocated.
-        */
-       sino = ino_lookup(blkrec->jb_ino, 0);
-       if (sino && sino->si_trunc) {
-               foff = lblktosize(fs, blkrec->jb_lbn);
-               foff += lfragtosize(fs, blkrec->jb_frags);
-               if (foff > sino->si_trunc->jt_size)
-                       sino->si_trunc = NULL;
-       }
        blk = blknum(fs, blkrec->jb_blkno);
        frag = fragnum(fs, blkrec->jb_blkno);
        sblk = blk_lookup(blk, 1);
@@ -2242,10 +2217,15 @@ ino_build_trunc(struct jtrncrec *rec)
        struct suj_ino *sino;
 
        if (debug)
-               printf("ino_build_trunc: ino %d, size %jd\n",
-                   rec->jt_ino, rec->jt_size);
+               printf("ino_build_trunc: op %d ino %d, size %jd\n",
+                   rec->jt_op, rec->jt_ino, rec->jt_size);
        sino = ino_lookup(rec->jt_ino, 1);
-       sino->si_trunc = rec;
+       if (rec->jt_op == JOP_SYNC) {
+               sino->si_trunc = NULL;
+               return;
+       }
+       if (sino->si_trunc == NULL || sino->si_trunc->jt_size > rec->jt_size)
+               sino->si_trunc = rec;
 }
 
 /*

Modified: head/sys/sys/vnode.h
==============================================================================
--- head/sys/sys/vnode.h        Fri Jun 10 22:42:00 2011        (r222957)
+++ head/sys/sys/vnode.h        Fri Jun 10 22:48:35 2011        (r222958)
@@ -302,6 +302,7 @@ struct vattr {
 #define        IO_EXT          0x0400          /* operate on external 
attributes */
 #define        IO_NORMAL       0x0800          /* operate on regular data */
 #define        IO_NOMACCHECK   0x1000          /* MAC checks unnecessary */
+#define        IO_BUFLOCKED    0x2000          /* ffs flag; indir buf is 
locked */
 
 #define IO_SEQMAX      0x7F            /* seq heuristic max value */
 #define IO_SEQSHIFT    16              /* seq heuristic in upper 16 bits */

Modified: head/sys/ufs/ffs/ffs_alloc.c
==============================================================================
--- head/sys/ufs/ffs/ffs_alloc.c        Fri Jun 10 22:42:00 2011        
(r222957)
+++ head/sys/ufs/ffs/ffs_alloc.c        Fri Jun 10 22:48:35 2011        
(r222958)
@@ -217,7 +217,7 @@ nospace:
        (void) chkdq(ip, -btodb(size), cred, FORCE);
        UFS_LOCK(ump);
 #endif
-       if (reclaimed == 0) {
+       if (reclaimed == 0 && (flags & IO_BUFLOCKED) == 0) {
                reclaimed = 1;
                softdep_request_cleanup(fs, ITOV(ip), cred, FLUSH_BLOCKS_WAIT);
                goto retry;
@@ -418,7 +418,7 @@ nospace:
        /*
         * no space available
         */
-       if (reclaimed == 0) {
+       if (reclaimed == 0 && (flags & IO_BUFLOCKED) == 0) {
                reclaimed = 1;
                UFS_UNLOCK(ump);
                if (bp) {

Modified: head/sys/ufs/ffs/ffs_balloc.c
==============================================================================
--- head/sys/ufs/ffs/ffs_balloc.c       Fri Jun 10 22:42:00 2011        
(r222957)
+++ head/sys/ufs/ffs/ffs_balloc.c       Fri Jun 10 22:48:35 2011        
(r222958)
@@ -105,6 +105,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t 
        ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
        int unwindidx = -1;
        int saved_inbdflush;
+       int reclaimed;
 
        ip = VTOI(vp);
        dp = ip->i_din1;
@@ -112,6 +113,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t 
        ump = ip->i_ump;
        lbn = lblkno(fs, startoffset);
        size = blkoff(fs, startoffset) + size;
+       reclaimed = 0;
        if (size > fs->fs_bsize)
                panic("ffs_balloc_ufs1: blk too big");
        *bpp = NULL;
@@ -276,6 +278,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t 
        /*
         * Fetch through the indirect blocks, allocating as necessary.
         */
+retry:
        for (i = 1;;) {
                error = bread(vp,
                    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
@@ -296,8 +299,15 @@ ffs_balloc_ufs1(struct vnode *vp, off_t 
                if (pref == 0)
                        pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
                if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
-                   flags, cred, &newb)) != 0) {
+                   flags | IO_BUFLOCKED, cred, &newb)) != 0) {
                        brelse(bp);
+                       if (++reclaimed == 1) {
+                               UFS_LOCK(ump);
+                               softdep_request_cleanup(fs, vp, cred,
+                                   FLUSH_BLOCKS_WAIT);
+                               UFS_UNLOCK(ump);
+                               goto retry;
+                       }
                        goto fail;
                }
                nb = newb;
@@ -349,10 +359,17 @@ ffs_balloc_ufs1(struct vnode *vp, off_t 
        if (nb == 0) {
                UFS_LOCK(ump);
                pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]);
-               error = ffs_alloc(ip,
-                   lbn, pref, (int)fs->fs_bsize, flags, cred, &newb);
+               error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
+                   flags | IO_BUFLOCKED, cred, &newb);
                if (error) {
                        brelse(bp);
+                       if (++reclaimed == 1) {
+                               UFS_LOCK(ump);
+                               softdep_request_cleanup(fs, vp, cred,
+                                   FLUSH_BLOCKS_WAIT);
+                               UFS_UNLOCK(ump);
+                               goto retry;
+                       }
                        goto fail;
                }
                nb = newb;
@@ -506,6 +523,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t 
        int deallocated, osize, nsize, num, i, error;
        int unwindidx = -1;
        int saved_inbdflush;
+       int reclaimed;
 
        ip = VTOI(vp);
        dp = ip->i_din2;
@@ -513,6 +531,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t 
        ump = ip->i_ump;
        lbn = lblkno(fs, startoffset);
        size = blkoff(fs, startoffset) + size;
+       reclaimed = 0;
        if (size > fs->fs_bsize)
                panic("ffs_balloc_ufs2: blk too big");
        *bpp = NULL;
@@ -787,6 +806,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t 
        /*
         * Fetch through the indirect blocks, allocating as necessary.
         */
+retry:
        for (i = 1;;) {
                error = bread(vp,
                    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
@@ -807,8 +827,15 @@ ffs_balloc_ufs2(struct vnode *vp, off_t 
                if (pref == 0)
                        pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
                if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
-                   flags, cred, &newb)) != 0) {
+                   flags | IO_BUFLOCKED, cred, &newb)) != 0) {
                        brelse(bp);
+                       if (++reclaimed == 1) {
+                               UFS_LOCK(ump);
+                               softdep_request_cleanup(fs, vp, cred,
+                                   FLUSH_BLOCKS_WAIT);
+                               UFS_UNLOCK(ump);
+                               goto retry;
+                       }
                        goto fail;
                }
                nb = newb;
@@ -860,10 +887,17 @@ ffs_balloc_ufs2(struct vnode *vp, off_t 
        if (nb == 0) {
                UFS_LOCK(ump);
                pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]);
-               error = ffs_alloc(ip,
-                   lbn, pref, (int)fs->fs_bsize, flags, cred, &newb);
+               error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
+                   flags | IO_BUFLOCKED, cred, &newb);
                if (error) {
                        brelse(bp);
+                       if (++reclaimed == 1) {
+                               UFS_LOCK(ump);
+                               softdep_request_cleanup(fs, vp, cred,
+                                   FLUSH_BLOCKS_WAIT);
+                               UFS_UNLOCK(ump);
+                               goto retry;
+                       }
                        goto fail;
                }
                nb = newb;

Modified: head/sys/ufs/ffs/ffs_extern.h
==============================================================================
--- head/sys/ufs/ffs/ffs_extern.h       Fri Jun 10 22:42:00 2011        
(r222957)
+++ head/sys/ufs/ffs/ffs_extern.h       Fri Jun 10 22:48:35 2011        
(r222958)
@@ -74,6 +74,7 @@ int   ffs_isfreeblock(struct fs *, u_char 
 void   ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t);
 int    ffs_mountroot(void);
 void   ffs_oldfscompat_write(struct fs *, struct ufsmount *);
+void   ffs_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end);
 int    ffs_reallocblks(struct vop_reallocblks_args *);
 int    ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
            ufs2_daddr_t, int, int, int, struct ucred *, struct buf **);
@@ -107,7 +108,6 @@ extern struct vop_vector ffs_fifoops2;
 
 int    softdep_check_suspend(struct mount *, struct vnode *,
          int, int, int, int);
-int    softdep_complete_trunc(struct vnode *, void *);
 void   softdep_get_depcounts(struct mount *, int *, int *);
 void   softdep_initialize(void);
 void   softdep_uninitialize(void);
@@ -139,14 +139,17 @@ void      softdep_setup_blkfree(struct mount 
 void   softdep_setup_inofree(struct mount *, struct buf *, ino_t,
            struct workhead *);
 void   softdep_setup_sbupdate(struct ufsmount *, struct fs *, struct buf *);
-void   *softdep_setup_trunc(struct vnode *vp, off_t length, int flags);
 void   softdep_fsync_mountdev(struct vnode *);
 int    softdep_sync_metadata(struct vnode *);
+int    softdep_sync_buf(struct vnode *, struct buf *, int);
 int     softdep_process_worklist(struct mount *, int);
 int     softdep_fsync(struct vnode *);
 int    softdep_waitidle(struct mount *);
 int    softdep_prealloc(struct vnode *, int);
 int    softdep_journal_lookup(struct mount *, struct vnode **);
+void   softdep_journal_freeblocks(struct inode *, struct ucred *, off_t, int);
+void   softdep_journal_fsync(struct inode *);
+
 
 /*
  * Things to request flushing in softdep_request_cleanup()

Modified: head/sys/ufs/ffs/ffs_inode.c
==============================================================================
--- head/sys/ufs/ffs/ffs_inode.c        Fri Jun 10 22:42:00 2011        
(r222957)
+++ head/sys/ufs/ffs/ffs_inode.c        Fri Jun 10 22:48:35 2011        
(r222958)
@@ -120,7 +120,7 @@ ffs_update(vp, waitfor)
        }
 }
 
-static void
+void
 ffs_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end)
 {
        vm_object_t object;
@@ -151,12 +151,12 @@ ffs_truncate(vp, length, flags, cred, td
        ufs2_daddr_t bn, lbn, lastblock, lastiblock[NIADDR], indir_lbn[NIADDR];
        ufs2_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
        ufs2_daddr_t count, blocksreleased = 0, datablocks;
-       void *cookie;
        struct bufobj *bo;
        struct fs *fs;
        struct buf *bp;
        struct ufsmount *ump;
-       int needextclean, softdepslowdown, extblocks;
+       int softdeptrunc, journaltrunc;
+       int needextclean, extblocks;
        int offset, size, level, nblocks;
        int i, error, allerror;
        off_t osize;
@@ -165,7 +165,6 @@ ffs_truncate(vp, length, flags, cred, td
        fs = ip->i_fs;
        ump = ip->i_ump;
        bo = &vp->v_bufobj;
-       cookie = NULL;
 
        ASSERT_VOP_LOCKED(vp, "ffs_truncate");
 
@@ -173,6 +172,11 @@ ffs_truncate(vp, length, flags, cred, td
                return (EINVAL);
        if (length > fs->fs_maxfilesize)
                return (EFBIG);
+#ifdef QUOTA
+       error = getinoquota(ip);
+       if (error)
+               return (error);
+#endif
        /*
         * Historically clients did not have to specify which data
         * they were truncating. So, if not specified, we assume
@@ -191,7 +195,10 @@ ffs_truncate(vp, length, flags, cred, td
         */
        allerror = 0;
        needextclean = 0;
-       softdepslowdown = DOINGSOFTDEP(vp) && softdep_slowdown(vp);
+       softdeptrunc = 0;
+       journaltrunc = DOINGSUJ(vp);
+       if (journaltrunc == 0 && DOINGSOFTDEP(vp) && length == 0)
+               softdeptrunc = !softdep_slowdown(vp);
        extblocks = 0;
        datablocks = DIP(ip, i_blocks);
        if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) {
@@ -199,27 +206,23 @@ ffs_truncate(vp, length, flags, cred, td
                datablocks -= extblocks;
        }
        if ((flags & IO_EXT) && extblocks > 0) {
-               if (DOINGSOFTDEP(vp) && softdepslowdown == 0 && length == 0) {
-                       if ((flags & IO_NORMAL) == 0) {
-                               softdep_setup_freeblocks(ip, length, IO_EXT);
-                               return (0);
-                       }
+               if (length != 0)
+                       panic("ffs_truncate: partial trunc of extdata");
+               if (softdeptrunc || journaltrunc) {
+                       if ((flags & IO_NORMAL) == 0)
+                               goto extclean;
                        needextclean = 1;
                } else {
-                       if (length != 0)
-                               panic("ffs_truncate: partial trunc of extdata");
                        if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0)
                                return (error);
-                       if (DOINGSUJ(vp))
-                               cookie = softdep_setup_trunc(vp, length, flags);
-                       osize = ip->i_din2->di_extsize;
-                       ip->i_din2->di_blocks -= extblocks;
 #ifdef QUOTA
                        (void) chkdq(ip, -extblocks, NOCRED, 0);
 #endif
                        vinvalbuf(vp, V_ALT, 0, 0);
                        ffs_pages_remove(vp,
                            OFF_TO_IDX(lblktosize(fs, -extblocks)), 0);
+                       osize = ip->i_din2->di_extsize;
+                       ip->i_din2->di_blocks -= extblocks;
                        ip->i_din2->di_extsize = 0;
                        for (i = 0; i < NXADDR; i++) {
                                oldblks[i] = ip->i_din2->di_extb[i];
@@ -227,7 +230,7 @@ ffs_truncate(vp, length, flags, cred, td
                        }
                        ip->i_flag |= IN_CHANGE;
                        if ((error = ffs_update(vp, 1)))
-                               goto out;
+                               return (error);
                        for (i = 0; i < NXADDR; i++) {
                                if (oldblks[i] == 0)
                                        continue;
@@ -236,10 +239,8 @@ ffs_truncate(vp, length, flags, cred, td
                        }
                }
        }
-       if ((flags & IO_NORMAL) == 0) {
-               error = 0;
-               goto out;
-       }
+       if ((flags & IO_NORMAL) == 0)
+               return (0);
        if (vp->v_type == VLNK &&
            (ip->i_size < vp->v_mount->mnt_maxsymlinklen ||
             datablocks == 0)) {
@@ -252,24 +253,17 @@ ffs_truncate(vp, length, flags, cred, td
                DIP_SET(ip, i_size, 0);
                ip->i_flag |= IN_CHANGE | IN_UPDATE;
                if (needextclean)
-                       softdep_setup_freeblocks(ip, length, IO_EXT);
-               error = ffs_update(vp, 1);
-               goto out;
+                       goto extclean;
+               return ffs_update(vp, 1);
        }
        if (ip->i_size == length) {
                ip->i_flag |= IN_CHANGE | IN_UPDATE;
                if (needextclean)
-                       softdep_setup_freeblocks(ip, length, IO_EXT);
-               error = ffs_update(vp, 0);
-               goto out;
+                       goto extclean;
+               return ffs_update(vp, 0);
        }
        if (fs->fs_ronly)
                panic("ffs_truncate: read-only filesystem");
-#ifdef QUOTA
-       error = getinoquota(ip);
-       if (error)
-               goto out;
-#endif
        if ((ip->i_flags & SF_SNAPSHOT) != 0)
                ffs_snapremove(vp);
        vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
@@ -285,7 +279,7 @@ ffs_truncate(vp, length, flags, cred, td
                error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
                if (error) {
                        vnode_pager_setsize(vp, osize);
-                       goto out;
+                       return (error);
                }
                ip->i_size = length;
                DIP_SET(ip, i_size, length);
@@ -296,11 +290,10 @@ ffs_truncate(vp, length, flags, cred, td
                else
                        bawrite(bp);
                ip->i_flag |= IN_CHANGE | IN_UPDATE;
-               error = ffs_update(vp, 1);
-               goto out;
+               return ffs_update(vp, 1);
        }
        if (DOINGSOFTDEP(vp)) {
-               if (length > 0 || softdepslowdown) {
+               if (softdeptrunc == 0 && journaltrunc == 0) {
                        /*
                         * If a file is only partially truncated, then
                         * we have to clean up the data structures
@@ -311,29 +304,20 @@ ffs_truncate(vp, length, flags, cred, td
                         * so that it will have no data structures left.
                         */
                        if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0)
-                               goto out;
-                       /*
-                        * We have to journal the truncation before we change
-                        * any blocks so we don't leave the file partially
-                        * truncated.
-                        */
-                       if (DOINGSUJ(vp) && cookie == NULL)
-                               cookie = softdep_setup_trunc(vp, length, flags);
+                               return (error);
                } else {
-#ifdef QUOTA
-                       (void) chkdq(ip, -datablocks, NOCRED, 0);
-#endif
-                       softdep_setup_freeblocks(ip, length, needextclean ?
-                           IO_EXT | IO_NORMAL : IO_NORMAL);
+                       flags = IO_NORMAL | (needextclean ? IO_EXT: 0);
+                       if (journaltrunc)
+                               softdep_journal_freeblocks(ip, cred, length,
+                                   flags);
+                       else
+                               softdep_setup_freeblocks(ip, length, flags);
                        ASSERT_VOP_LOCKED(vp, "ffs_truncate1");
-                       vinvalbuf(vp, needextclean ? 0 : V_NORMAL, 0, 0);
-                       if (!needextclean)
-                               ffs_pages_remove(vp, 0,
-                                   OFF_TO_IDX(lblktosize(fs, -extblocks)));
-                       vnode_pager_setsize(vp, 0);
-                       ip->i_flag |= IN_CHANGE | IN_UPDATE;
-                       error = ffs_update(vp, 0);
-                       goto out;
+                       if (journaltrunc == 0) {
+                               ip->i_flag |= IN_CHANGE | IN_UPDATE;
+                               error = ffs_update(vp, 0);
+                       }
+                       return (error);
                }
        }
        /*
@@ -353,7 +337,7 @@ ffs_truncate(vp, length, flags, cred, td
                flags |= BA_CLRBUF;
                error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
                if (error)
-                       goto out;
+                       return (error);
                /*
                 * When we are doing soft updates and the UFS_BALLOC
                 * above fills in a direct block hole with a full sized
@@ -365,7 +349,7 @@ ffs_truncate(vp, length, flags, cred, td
                if (DOINGSOFTDEP(vp) && lbn < NDADDR &&
                    fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize &&
                    (error = ffs_syncvnode(vp, MNT_WAIT)) != 0)
-                       goto out;
+                       return (error);
                ip->i_size = length;
                DIP_SET(ip, i_size, length);
                size = blksize(fs, ip, lbn);
@@ -411,13 +395,7 @@ ffs_truncate(vp, length, flags, cred, td
                        DIP_SET(ip, i_db[i], 0);
        }
        ip->i_flag |= IN_CHANGE | IN_UPDATE;
-       /*
-        * When doing softupdate journaling we must preserve the size along
-        * with the old pointers until they are freed or we might not
-        * know how many fragments remain.
-        */
-       if (!DOINGSUJ(vp))
-               allerror = ffs_update(vp, 1);
+       allerror = ffs_update(vp, 1);
        
        /*
         * Having written the new inode to disk, save its new configuration
@@ -541,14 +519,14 @@ done:
 #ifdef QUOTA
        (void) chkdq(ip, -blocksreleased, NOCRED, 0);
 #endif
-       error = allerror;
-out:
-       if (cookie) {
-               allerror = softdep_complete_trunc(vp, cookie);
-               if (allerror != 0 && error == 0)
-                       error = allerror;
-       }
-       return (error);
+       return (allerror);
+
+extclean:
+       if (journaltrunc)
+               softdep_journal_freeblocks(ip, cred, length, IO_EXT);
+       else
+               softdep_setup_freeblocks(ip, length, IO_EXT);
+       return ffs_update(vp, MNT_WAIT);
 }
 
 /*

Modified: head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- head/sys/ufs/ffs/ffs_softdep.c      Fri Jun 10 22:42:00 2011        
(r222957)
+++ head/sys/ufs/ffs/ffs_softdep.c      Fri Jun 10 22:48:35 2011        
(r222958)
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/buf.h>
 #include <sys/kdb.h>
 #include <sys/kthread.h>
+#include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
@@ -71,6 +72,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/syslog.h>
 #include <sys/vnode.h>
 #include <sys/conf.h>
+
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
@@ -82,6 +84,8 @@ __FBSDID("$FreeBSD$");
 #include <ufs/ufs/ufs_extern.h>
 
 #include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
 
 #include <ddb/ddb.h>
 
@@ -214,6 +218,25 @@ softdep_setup_allocindir_meta(nbp, ip, b
 }
 
 void
+softdep_journal_freeblocks(ip, cred, length, flags)
+       struct inode *ip;
+       struct ucred *cred;
+       off_t length;
+       int flags;
+{
+       
+       panic("softdep_journal_freeblocks called");
+}
+
+void
+softdep_journal_fsync(ip)
+       struct inode *ip;
+{
+
+       panic("softdep_journal_fsync called");
+}
+
+void
 softdep_setup_freeblocks(ip, length, flags)
        struct inode *ip;
        off_t length;
@@ -282,29 +305,6 @@ softdep_setup_directory_change(bp, dp, i
        panic("softdep_setup_directory_change called");
 }
 
-void *
-softdep_setup_trunc(vp, length, flags)
-       struct vnode *vp;
-       off_t length;
-       int flags;
-{
-
-       panic("%s called", __FUNCTION__);
-
-       return (NULL);
-}
-
-int
-softdep_complete_trunc(vp, cookie)
-       struct vnode *vp;
-       void *cookie;
-{
-
-       panic("%s called", __FUNCTION__);
-
-       return (0);
-}
-
 void
 softdep_setup_blkfree(mp, bp, blkno, frags, wkhd)
        struct mount *mp;
@@ -499,6 +499,13 @@ softdep_sync_metadata(struct vnode *vp)
 }
 
 int
+softdep_sync_buf(struct vnode *vp, struct buf *bp, int waitfor)
+{
+
+       return (0);
+}
+
+int
 softdep_slowdown(vp)
        struct vnode *vp;
 {
@@ -614,10 +621,13 @@ FEATURE(softupdates, "FFS soft-updates s
 #define        D_JSEGDEP       23
 #define        D_SBDEP         24
 #define        D_JTRUNC        25
-#define        D_LAST          D_JTRUNC
+#define        D_JFSYNC        26
+#define        D_SENTINAL      27
+#define        D_LAST          D_SENTINAL
 
 unsigned long dep_current[D_LAST + 1];
 unsigned long dep_total[D_LAST + 1];
+unsigned long dep_write[D_LAST + 1];
 
 
 SYSCTL_NODE(_debug, OID_AUTO, softdep, CTLFLAG_RW, 0, "soft updates stats");
@@ -625,13 +635,17 @@ SYSCTL_NODE(_debug_softdep, OID_AUTO, to
     "total dependencies allocated");
 SYSCTL_NODE(_debug_softdep, OID_AUTO, current, CTLFLAG_RW, 0,
     "current dependencies allocated");
+SYSCTL_NODE(_debug_softdep, OID_AUTO, write, CTLFLAG_RW, 0,
+    "current dependencies written");
 
 #define        SOFTDEP_TYPE(type, str, long)                                   
\
     static MALLOC_DEFINE(M_ ## type, #str, long);                      \
     SYSCTL_ULONG(_debug_softdep_total, OID_AUTO, str, CTLFLAG_RD,      \
        &dep_total[D_ ## type], 0, "");                                 \
     SYSCTL_ULONG(_debug_softdep_current, OID_AUTO, str, CTLFLAG_RD,    \
-       &dep_current[D_ ## type], 0, "");
+       &dep_current[D_ ## type], 0, "");                               \
+    SYSCTL_ULONG(_debug_softdep_write, OID_AUTO, str, CTLFLAG_RD,      \
+       &dep_write[D_ ## type], 0, "");
 
 SOFTDEP_TYPE(PAGEDEP, pagedep, "File page dependencies"); 
 SOFTDEP_TYPE(INODEDEP, inodedep, "Inode dependencies");
@@ -660,6 +674,7 @@ SOFTDEP_TYPE(JSEG, jseg, "Journal segmen
 SOFTDEP_TYPE(JSEGDEP, jsegdep, "Journal segment complete");
 SOFTDEP_TYPE(SBDEP, sbdep, "Superblock write dependency");
 SOFTDEP_TYPE(JTRUNC, jtrunc, "Journal inode truncation");
+SOFTDEP_TYPE(JFSYNC, jfsync, "Journal fsync complete");
 
 static MALLOC_DEFINE(M_SAVEDINO, "savedino", "Saved inodes");
 static MALLOC_DEFINE(M_JBLOCKS, "jblocks", "Journal block locations");
@@ -694,7 +709,8 @@ static struct malloc_type *memtype[] = {
        M_JSEG,
        M_JSEGDEP,
        M_SBDEP,
-       M_JTRUNC
+       M_JTRUNC,
+       M_JFSYNC
 };
 
 static LIST_HEAD(mkdirlist, mkdir) mkdirlisthd;
@@ -734,10 +750,11 @@ static    void clear_unlinked_inodedep(stru
 static struct inodedep *first_unlinked_inodedep(struct ufsmount *);
 static int flush_pagedep_deps(struct vnode *, struct mount *,
            struct diraddhd *);
-static void free_pagedep(struct pagedep *);
+static int free_pagedep(struct pagedep *);
 static int flush_newblk_dep(struct vnode *, struct mount *, ufs_lbn_t);
-static int flush_inodedep_deps(struct mount *, ino_t);
+static int flush_inodedep_deps(struct vnode *, struct mount *, ino_t);
 static int flush_deplist(struct allocdirectlst *, int, int *);
+static int sync_cgs(struct mount *, int);
 static int handle_written_filepage(struct pagedep *, struct buf *);
 static int handle_written_sbdep(struct sbdep *, struct buf *);
 static void initiate_write_sbdep(struct sbdep *);
@@ -750,7 +767,7 @@ static      void handle_written_jaddref(struc
 static void handle_written_jremref(struct jremref *);
 static void handle_written_jseg(struct jseg *, struct buf *);
 static void handle_written_jnewblk(struct jnewblk *);
-static void handle_written_jfreeblk(struct jfreeblk *);
+static void handle_written_jblkdep(struct jblkdep *);
 static void handle_written_jfreefrag(struct jfreefrag *);
 static void complete_jseg(struct jseg *);
 static void jseg_write(struct ufsmount *ump, struct jseg *, uint8_t *);
@@ -758,6 +775,7 @@ static      void jaddref_write(struct jaddref
 static void jremref_write(struct jremref *, struct jseg *, uint8_t *);
 static void jmvref_write(struct jmvref *, struct jseg *, uint8_t *);
 static void jtrunc_write(struct jtrunc *, struct jseg *, uint8_t *);
+static void jfsync_write(struct jfsync *, struct jseg *, uint8_t *data);
 static void jnewblk_write(struct jnewblk *, struct jseg *, uint8_t *);
 static void jfreeblk_write(struct jfreeblk *, struct jseg *, uint8_t *);
 static void jfreefrag_write(struct jfreefrag *, struct jseg *, uint8_t *);
@@ -768,7 +786,9 @@ static      void handle_allocdirect_partdone(
 static struct jnewblk *cancel_newblk(struct newblk *, struct worklist *,
            struct workhead *);
 static void indirdep_complete(struct indirdep *);
-static int indirblk_inseg(struct mount *, ufs2_daddr_t);
+static int indirblk_lookup(struct mount *, ufs2_daddr_t);
+static void indirblk_insert(struct freework *);
+static void indirblk_remove(struct freework *);
 static void handle_allocindir_partdone(struct allocindir *);
 static void initiate_write_filepage(struct pagedep *, struct buf *);
 static void initiate_write_indirdep(struct indirdep*, struct buf *);
@@ -777,10 +797,12 @@ static    void initiate_write_bmsafemap(str
 static void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *);
 static void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *);
 static void handle_workitem_freefile(struct freefile *);
-static void handle_workitem_remove(struct dirrem *, struct vnode *);
+static int handle_workitem_remove(struct dirrem *, int);
 static struct dirrem *newdirrem(struct buf *, struct inode *,
            struct inode *, int, struct dirrem **);
-static void cancel_indirdep(struct indirdep *, struct buf *, struct inodedep *,
+static struct indirdep *indirdep_lookup(struct mount *, struct inode *,
+           struct buf *);
+static void cancel_indirdep(struct indirdep *, struct buf *,
            struct freeblks *);
 static void free_indirdep(struct indirdep *);
 static void free_diradd(struct diradd *, struct workhead *);
@@ -795,8 +817,13 @@ static     void cancel_diradd(struct diradd 
            struct jremref *, struct jremref *);
 static void dirrem_journal(struct dirrem *, struct jremref *, struct jremref *,
            struct jremref *);
-static void cancel_allocindir(struct allocindir *, struct inodedep *,
-           struct freeblks *);
+static void cancel_allocindir(struct allocindir *, struct buf *bp,
+           struct freeblks *, int);
+static int setup_trunc_indir(struct freeblks *, struct inode *,
+           ufs_lbn_t, ufs_lbn_t, ufs2_daddr_t);
+static void complete_trunc_indir(struct freework *);
+static void trunc_indirdep(struct indirdep *, struct freeblks *, struct buf *,
+           int);
 static void complete_mkdir(struct mkdir *);
 static void free_newdirblk(struct newdirblk *);
 static void free_jremref(struct jremref *);
@@ -806,7 +833,7 @@ static      void free_jsegs(struct jblocks *)
 static void rele_jseg(struct jseg *);
 static void free_jseg(struct jseg *, struct jblocks *);
 static void free_jnewblk(struct jnewblk *);
-static void free_jfreeblk(struct jfreeblk *);
+static void free_jblkdep(struct jblkdep *);
 static void free_jfreefrag(struct jfreefrag *);
 static void free_freedep(struct freedep *);
 static void journal_jremref(struct dirrem *, struct jremref *,
@@ -818,30 +845,33 @@ static    void cancel_jfreefrag(struct jfre
 static inline void setup_freedirect(struct freeblks *, struct inode *,
            int, int);
 static inline void setup_freeext(struct freeblks *, struct inode *, int, int);
-static inline void setup_freeindir(struct freeblks *, struct inode *, int i,
+static inline void setup_freeindir(struct freeblks *, struct inode *, int,
            ufs_lbn_t, int);
 static inline struct freeblks *newfreeblks(struct mount *, struct inode *);
 static void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t);
-static void softdep_trunc_deps(struct vnode *, struct freeblks *, ufs_lbn_t,
+ufs2_daddr_t blkcount(struct fs *, ufs2_daddr_t, off_t);
+static int trunc_check_buf(struct buf *, int *, ufs_lbn_t, int, int);
+static void trunc_dependencies(struct inode *, struct freeblks *, ufs_lbn_t,
            int, int);
-static         int cancel_pagedep(struct pagedep *, struct inodedep *,
-           struct freeblks *);
-static int deallocate_dependencies(struct buf *, struct inodedep *,
-           struct freeblks *, int off);
+static void trunc_pages(struct inode *, off_t, ufs2_daddr_t, int);
+static         int cancel_pagedep(struct pagedep *, struct freeblks *, int);
+static int deallocate_dependencies(struct buf *, struct freeblks *, int);
+static void newblk_freefrag(struct newblk*);
 static void free_newblk(struct newblk *);
 static void cancel_allocdirect(struct allocdirectlst *,
-           struct allocdirect *, struct freeblks *, int);
+           struct allocdirect *, struct freeblks *);
 static int check_inode_unwritten(struct inodedep *);
 static int free_inodedep(struct inodedep *);
 static void freework_freeblock(struct freework *);
-static void handle_workitem_freeblocks(struct freeblks *, int);
-static void handle_complete_freeblocks(struct freeblks *);
+static void freework_enqueue(struct freework *);
+static int handle_workitem_freeblocks(struct freeblks *, int);
+static int handle_complete_freeblocks(struct freeblks *, int);
 static void handle_workitem_indirblk(struct freework *);
-static void handle_written_freework(struct freework *);
+static void handle_written_freework(struct freework *, int);
 static void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *);
 static struct worklist *jnewblk_merge(struct worklist *, struct worklist *,
            struct workhead *);
-static void setup_allocindir_phase2(struct buf *, struct inode *,
+static struct freefrag *setup_allocindir_phase2(struct buf *, struct inode *,
            struct inodedep *, struct allocindir *, ufs_lbn_t);
 static struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t,
            ufs2_daddr_t, ufs_lbn_t);
@@ -862,16 +892,20 @@ static    int newblk_lookup(struct mount *,
 static int inodedep_find(struct inodedep_hashhead *, struct fs *, ino_t,
            struct inodedep **);
 static int inodedep_lookup(struct mount *, ino_t, int, struct inodedep **);
-static int pagedep_lookup(struct mount *, ino_t, ufs_lbn_t, int,
-           struct pagedep **);
+static int pagedep_lookup(struct mount *, struct buf *bp, ino_t, ufs_lbn_t,
+           int, struct pagedep **);
 static int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t,
            struct mount *mp, int, struct pagedep **);
 static void pause_timer(void *);
 static int request_cleanup(struct mount *, int);
-static int process_worklist_item(struct mount *, int);
+static int process_worklist_item(struct mount *, int, int);
 static void process_removes(struct vnode *);
+static void process_truncates(struct vnode *);
 static void jwork_move(struct workhead *, struct workhead *);
+static void jwork_insert(struct workhead *, struct jsegdep *);
 static void add_to_worklist(struct worklist *, int);
+static void wake_worklist(struct worklist *);
+static void wait_worklist(struct worklist *, char *);
 static void remove_from_worklist(struct worklist *);
 static void softdep_flush(void);
 static int softdep_speedup(void);
@@ -889,17 +923,20 @@ static    struct jremref *newjremref(struct
            struct inode *ip, off_t, nlink_t);
 static struct jaddref *newjaddref(struct inode *, ino_t, off_t, int16_t,
            uint16_t);
-static inline void newinoref(struct inoref *, ino_t, ino_t, off_t, nlink_t,
+static inline void newinoref(struct inoref *, ino_t, ino_t, off_t, nlink_t,
            uint16_t);
-static inline struct jsegdep *inoref_jseg(struct inoref *);
+static inline struct jsegdep *inoref_jseg(struct inoref *);
 static struct jmvref *newjmvref(struct inode *, ino_t, off_t, off_t);
 static struct jfreeblk *newjfreeblk(struct freeblks *, ufs_lbn_t,
            ufs2_daddr_t, int);
+static struct jtrunc *newjtrunc(struct freeblks *, off_t, int);
+static void move_newblock_dep(struct jaddref *, struct inodedep *);
+static void cancel_jfreeblk(struct freeblks *, ufs2_daddr_t);
 static struct jfreefrag *newjfreefrag(struct freefrag *, struct inode *,
            ufs2_daddr_t, long, ufs_lbn_t);
 static struct freework *newfreework(struct ufsmount *, struct freeblks *,
-           struct freework *, ufs_lbn_t, ufs2_daddr_t, int, int);
-static void jwait(struct worklist *wk);
+           struct freework *, ufs_lbn_t, ufs2_daddr_t, int, int, int);
+static int jwait(struct worklist *, int);
 static struct inodedep *inodedep_lookup_ip(struct inode *);
 static int bmsafemap_rollbacks(struct bmsafemap *);
 static struct freefile *handle_bufwait(struct inodedep *, struct workhead *);
@@ -1064,6 +1101,30 @@ jwork_move(dst, src)
        }
 }
 
+static void
+jwork_insert(dst, jsegdep)
+       struct workhead *dst;
+       struct jsegdep *jsegdep;
+{
+       struct jsegdep *jsegdepn;
+       struct worklist *wk;
+
+       LIST_FOREACH(wk, dst, wk_list)
+               if (wk->wk_type == D_JSEGDEP)
+                       break;
+       if (wk == NULL) {
+               WORKLIST_INSERT(dst, &jsegdep->jd_list);
+               return;
+       }
+       jsegdepn = WK_JSEGDEP(wk);
+       if (jsegdep->jd_seg->js_seq < jsegdepn->jd_seg->js_seq) {
+               WORKLIST_REMOVE(wk);
+               free_jsegdep(jsegdepn);
+               WORKLIST_INSERT(dst, &jsegdep->jd_list);
+       } else
+               free_jsegdep(jsegdep);
+}
+
 /*
  * Routines for tracking and managing workitems.
  */
@@ -1088,6 +1149,8 @@ workitem_free(item, type)
                panic("workitem_free: type mismatch %s != %s",
                    TYPENAME(item->wk_type), TYPENAME(type));
 #endif
+       if (item->wk_state & IOWAITING)
+               wakeup(item);
        ump = VFSTOUFS(item->wk_mp);
        if (--ump->softdep_deps == 0 && ump->softdep_req)
                wakeup(&ump->softdep_deps);
@@ -1101,14 +1164,18 @@ workitem_alloc(item, type, mp)
        int type;
        struct mount *mp;
 {
+       struct ufsmount *ump;
+
        item->wk_type = type;
        item->wk_mp = mp;
        item->wk_state = 0;
+
+       ump = VFSTOUFS(mp);
        ACQUIRE_LOCK(&lk);
        dep_current[type]++;
        dep_total[type]++;
-       VFSTOUFS(mp)->softdep_deps++;
-       VFSTOUFS(mp)->softdep_accdeps++;
+       ump->softdep_deps++;
+       ump->softdep_accdeps++;
        FREE_LOCK(&lk);
 }
 
@@ -1270,8 +1337,7 @@ softdep_flush(void)
                        vfslocked = VFS_LOCK_GIANT(mp);
                        progress += softdep_process_worklist(mp, 0);
                        ump = VFSTOUFS(mp);
-                       remaining += ump->softdep_on_worklist -
-                               ump->softdep_on_worklist_inprogress;
+                       remaining += ump->softdep_on_worklist;
                        VFS_UNLOCK_GIANT(vfslocked);
                        mtx_lock(&mountlist_mtx);
                        nmp = TAILQ_NEXT(mp, mnt_list);
@@ -1314,10 +1380,14 @@ softdep_speedup(void)
  * The following routine is the only one that removes items
  * and does so in order from first to last.
  */
+
+#define        WK_HEAD         0x0001  /* Add to HEAD. */
+#define        WK_NODELAY      0x0002  /* Process immediately. */
+
 static void
-add_to_worklist(wk, nodelay)
+add_to_worklist(wk, flags)
        struct worklist *wk;
-       int nodelay;
+       int flags;
 {
        struct ufsmount *ump;
 
@@ -1327,13 +1397,17 @@ add_to_worklist(wk, nodelay)
                panic("add_to_worklist: %s(0x%X) already on list",
                    TYPENAME(wk->wk_type), wk->wk_state);
        wk->wk_state |= ONWORKLIST;
-       if (LIST_EMPTY(&ump->softdep_workitem_pending))
+       if (ump->softdep_on_worklist == 0) {
                LIST_INSERT_HEAD(&ump->softdep_workitem_pending, wk, wk_list);
-       else
+               ump->softdep_worklist_tail = wk;
+       } else if (flags & WK_HEAD) {
+               LIST_INSERT_HEAD(&ump->softdep_workitem_pending, wk, wk_list);
+       } else {
                LIST_INSERT_AFTER(ump->softdep_worklist_tail, wk, wk_list);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to