svn commit: r222958 - in head: sbin/fsck_ffs sys/sys sys/ufs/ffs sys/ufs/ufs
Author: jeff Date: Fri Jun 10 22:48:35 2011 New Revision: 222958 URL: http://svn.freebsd.org/changeset/base/222958 Log: Implement fully asynchronous partial truncation with softupdates journaling to resolve errors which can cause corruption on recovery with the old synchronous mechanism. - Append partial truncation freework structures to indirdeps while truncation is proceeding. These prevent new block pointers from becoming valid until truncation completes and serialize truncations. - On completion of a partial truncate journal work waits for zeroed pointers to hit indirects. - softdep_journal_freeblocks() handles last frag allocation and last block zeroing. - vtruncbuf/ffs_page_remove moved into softdep_*_freeblocks() so it is only implemented in one place. - Block allocation failure handling moved up one level so it does not proceed with buf locks held. This permits us to do more extensive reclaims when filesystem space is exhausted. - softdep_sync_metadata() is broken into two parts, the first executes once at the start of ffs_syncvnode() and flushes truncations and inode dependencies. The second is called on each locked buf. This eliminates excessive looping and rollbacks. - Improve the mechanism in process_worklist_item() that handles acquiring vnode locks for handle_workitem_remove() so that it works more generally and does not loop excessively over the same worklist items on each call. - Don't corrupt directories by zeroing the tail in fsck. This is only done for regular files. - Push a fsync complete record for files that need it so the checker knows a truncation in the journal is no longer valid. Discussed with: mckusick, kib (ffs_pages_remove and ffs_truncate parts) Tested by:pho Modified: head/sbin/fsck_ffs/suj.c head/sys/sys/vnode.h head/sys/ufs/ffs/ffs_alloc.c head/sys/ufs/ffs/ffs_balloc.c head/sys/ufs/ffs/ffs_extern.h head/sys/ufs/ffs/ffs_inode.c head/sys/ufs/ffs/ffs_softdep.c head/sys/ufs/ffs/ffs_vfsops.c head/sys/ufs/ffs/ffs_vnops.c head/sys/ufs/ffs/fs.h head/sys/ufs/ffs/softdep.h head/sys/ufs/ufs/inode.h head/sys/ufs/ufs/ufsmount.h Modified: head/sbin/fsck_ffs/suj.c == --- head/sbin/fsck_ffs/suj.cFri Jun 10 22:42:00 2011(r222957) +++ head/sbin/fsck_ffs/suj.cFri Jun 10 22:48:35 2011(r222958) @@ -1604,7 +1604,7 @@ ino_trunc(ino_t ino, off_t size) * uninitialized space later. */ off = blkoff(fs, size); - if (off) { + if (off DIP(ip, di_mode) != IFDIR) { uint8_t *buf; long clrsize; @@ -1775,13 +1775,18 @@ cg_trunc(struct suj_cg *sc) struct suj_ino *sino; int i; - for (i = 0; i SUJ_HASHSIZE; i++) - LIST_FOREACH(sino, sc-sc_inohash[i], si_next) + for (i = 0; i SUJ_HASHSIZE; i++) { + LIST_FOREACH(sino, sc-sc_inohash[i], si_next) { if (sino-si_trunc) { ino_trunc(sino-si_ino, sino-si_trunc-jt_size); + sino-si_blkadj = 0; sino-si_trunc = NULL; } + if (sino-si_blkadj) + ino_adjblks(sino); + } + } } /* @@ -1791,7 +1796,6 @@ cg_trunc(struct suj_cg *sc) static void cg_check_blk(struct suj_cg *sc) { - struct suj_ino *sino; struct suj_blk *sblk; int i; @@ -1799,15 +1803,6 @@ cg_check_blk(struct suj_cg *sc) for (i = 0; i SUJ_HASHSIZE; i++) LIST_FOREACH(sblk, sc-sc_blkhash[i], sb_next) blk_check(sblk); - /* -* Now that we've freed blocks which are not referenced we -* make a second pass over all inodes to adjust their block -* counts. -*/ - for (i = 0; i SUJ_HASHSIZE; i++) - LIST_FOREACH(sino, sc-sc_inohash[i], si_next) - if (sino-si_blkadj) - ino_adjblks(sino); } /* @@ -1961,14 +1956,7 @@ ino_append(union jrec *rec) parent %d, diroff %jd\n, refrec-jr_op, refrec-jr_ino, refrec-jr_nlink, refrec-jr_parent, refrec-jr_diroff); - /* -* Lookup the ino and clear truncate if one is found. Partial -* truncates are always done synchronously so if we discover -* an operation that requires a lock the truncation has completed -* and can be discarded. -*/ sino = ino_lookup(((struct jrefrec *)rec)-jr_ino, 1); - sino-si_trunc = NULL; sino-si_hasrecs = 1; srec = errmalloc(sizeof(*srec)); srec-sr_rec = rec; @@ -2174,9 +2162,7 @@ blk_build(struct jblkrec
Re: svn commit: r222958 - in head: sbin/fsck_ffs sys/sys sys/ufs/ffs sys/ufs/ufs
On Fri, 10 Jun 2011, Jeff Roberson wrote: Author: jeff Date: Fri Jun 10 22:48:35 2011 New Revision: 222958 URL: http://svn.freebsd.org/changeset/base/222958 Log: Implement fully asynchronous partial truncation with softupdates journaling to resolve errors which can cause corruption on recovery with the old synchronous mechanism. This diff is enormous and took months of work. I'm sorry to get it in so close to 9.0, I had no idea it would take so long. pho has tested multiple versions of the patch with and without journaling for days of test time and it has probably racked up a week of machine time for me but there may be problems given that it is so huge. There is still a snapshot problem with SUJ that mckusick and I are working on. Expect to see some checkins for that soon. Thanks, Jeff - Append partial truncation freework structures to indirdeps while truncation is proceeding. These prevent new block pointers from becoming valid until truncation completes and serialize truncations. - On completion of a partial truncate journal work waits for zeroed pointers to hit indirects. - softdep_journal_freeblocks() handles last frag allocation and last block zeroing. - vtruncbuf/ffs_page_remove moved into softdep_*_freeblocks() so it is only implemented in one place. - Block allocation failure handling moved up one level so it does not proceed with buf locks held. This permits us to do more extensive reclaims when filesystem space is exhausted. - softdep_sync_metadata() is broken into two parts, the first executes once at the start of ffs_syncvnode() and flushes truncations and inode dependencies. The second is called on each locked buf. This eliminates excessive looping and rollbacks. - Improve the mechanism in process_worklist_item() that handles acquiring vnode locks for handle_workitem_remove() so that it works more generally and does not loop excessively over the same worklist items on each call. - Don't corrupt directories by zeroing the tail in fsck. This is only done for regular files. - Push a fsync complete record for files that need it so the checker knows a truncation in the journal is no longer valid. Discussed with:mckusick, kib (ffs_pages_remove and ffs_truncate parts) Tested by: pho Modified: head/sbin/fsck_ffs/suj.c head/sys/sys/vnode.h head/sys/ufs/ffs/ffs_alloc.c head/sys/ufs/ffs/ffs_balloc.c head/sys/ufs/ffs/ffs_extern.h head/sys/ufs/ffs/ffs_inode.c head/sys/ufs/ffs/ffs_softdep.c head/sys/ufs/ffs/ffs_vfsops.c head/sys/ufs/ffs/ffs_vnops.c head/sys/ufs/ffs/fs.h head/sys/ufs/ffs/softdep.h head/sys/ufs/ufs/inode.h head/sys/ufs/ufs/ufsmount.h Modified: head/sbin/fsck_ffs/suj.c == --- head/sbin/fsck_ffs/suj.cFri Jun 10 22:42:00 2011(r222957) +++ head/sbin/fsck_ffs/suj.cFri Jun 10 22:48:35 2011(r222958) @@ -1604,7 +1604,7 @@ ino_trunc(ino_t ino, off_t size) * uninitialized space later. */ off = blkoff(fs, size); - if (off) { + if (off DIP(ip, di_mode) != IFDIR) { uint8_t *buf; long clrsize; @@ -1775,13 +1775,18 @@ cg_trunc(struct suj_cg *sc) struct suj_ino *sino; int i; - for (i = 0; i SUJ_HASHSIZE; i++) - LIST_FOREACH(sino, sc-sc_inohash[i], si_next) + for (i = 0; i SUJ_HASHSIZE; i++) { + LIST_FOREACH(sino, sc-sc_inohash[i], si_next) { if (sino-si_trunc) { ino_trunc(sino-si_ino, sino-si_trunc-jt_size); + sino-si_blkadj = 0; sino-si_trunc = NULL; } + if (sino-si_blkadj) + ino_adjblks(sino); + } + } } /* @@ -1791,7 +1796,6 @@ cg_trunc(struct suj_cg *sc) static void cg_check_blk(struct suj_cg *sc) { - struct suj_ino *sino; struct suj_blk *sblk; int i; @@ -1799,15 +1803,6 @@ cg_check_blk(struct suj_cg *sc) for (i = 0; i SUJ_HASHSIZE; i++) LIST_FOREACH(sblk, sc-sc_blkhash[i], sb_next) blk_check(sblk); - /* -* Now that we've freed blocks which are not referenced we -* make a second pass over all inodes to adjust their block -* counts. -*/ - for (i = 0; i SUJ_HASHSIZE; i++) - LIST_FOREACH(sino, sc-sc_inohash[i], si_next) - if (sino-si_blkadj) - ino_adjblks(sino); } /* @@ -1961,14 +1956,7 @@ ino_append(union jrec *rec) parent %d, diroff %jd\n, refrec-jr_op, refrec-jr_ino, refrec-jr_nlink, refrec-jr_parent, refrec-jr_diroff); - /* -* Lookup the ino