Author: mckusick
Date: Fri Sep 22 12:45:15 2017
New Revision: 323923
URL: https://svnweb.freebsd.org/changeset/base/323923

Log:
  Continuing efforts to provide hardening of FFS, this change adds a
  check hash to cylinder groups. If a check hash fails when a cylinder
  group is read, no further allocations are attempted in that cylinder
  group until it has been fixed by fsck. This avoids a class of
  filesystem panics related to corrupted cylinder group maps. The
  hash is done using crc32c.
  
  Check hases are added only to UFS2 and not to UFS1 as UFS1 is primarily
  used in embedded systems with small memories and low-powered processors
  which need as light-weight a filesystem as possible.
  
  Specifics of the changes:
  
  sys/sys/buf.h:
      Add BX_FSPRIV to reserve a set of eight b_xflags that may be used
      by individual filesystems for their own purpose. Their specific
      definitions are found in the header files for each filesystem
      that uses them. Also add fields to struct buf as noted below.
  
  sys/kern/vfs_bio.c:
      It is only necessary to compute a check hash for a cylinder
      group when it is actually read from disk. When calling bread,
      you do not know whether the buffer was found in the cache or
      read. So a new flag (GB_CKHASH) and a pointer to a function to
      perform the hash has been added to breadn_flags to say that the
      function should be called to calculate a hash if the data has
      been read. The check hash is placed in b_ckhash and the B_CKHASH
      flag is set to indicate that a read was done and a check hash
      calculated. Though a rather elaborate mechanism, it should
      also work for check hashing other metadata in the future. A
      kernel internal API change was to change breada into a static
      fucntion and add flags and a function pointer to a check-hash
      function.
  
  sys/ufs/ffs/fs.h:
      Add flags for types of check hashes; stored in a new word in the
      superblock. Define corresponding BX_ flags for the different types
      of check hashes. Add a check hash word in the cylinder group.
  
  sys/ufs/ffs/ffs_alloc.c:
      In ffs_getcg do the dance with breadn_flags to get a check hash and
      if one is provided, check it.
  
  sys/ufs/ffs/ffs_vfsops.c:
      Copy across the BX_FFSTYPES flags in background writes.
      Update the check hash when writing out buffers that need them.
  
  sys/ufs/ffs/ffs_snapshot.c:
      Recompute check hash when updating snapshot cylinder groups.
  
  sys/libkern/crc32.c:
  lib/libufs/Makefile:
  lib/libufs/libufs.h:
  lib/libufs/cgroup.c:
      Include libkern/crc32.c in libufs and use it to compute check
      hashes when updating cylinder groups.
  
  Four utilities are affected:
  
  sbin/newfs/mkfs.c:
      Add the check hashes when building the cylinder groups.
  
  sbin/fsck_ffs/fsck.h:
  sbin/fsck_ffs/fsutil.c:
      Verify and update check hashes when checking and writing cylinder groups.
  
  sbin/fsck_ffs/pass5.c:
      Offer to add check hashes to existing filesystems.
      Precompute check hashes when rebuilding cylinder group
      (although this will be done when it is written in fsutil.c
      it is necessary to do it early before comparing with the old
      cylinder group)
  
  sbin/dumpfs/dumpfs.c
      Print out the new check hash flag(s)
  
  sbin/fsdb/Makefile:
      Needs to add libufs now used by pass5.c imported from fsck_ffs.
  
  Reviewed by: kib
  Tested by: Peter Holm (pho)

Modified:
  head/lib/libufs/Makefile
  head/lib/libufs/cgroup.c
  head/lib/libufs/libufs.h
  head/sbin/dumpfs/dumpfs.c
  head/sbin/fsck_ffs/fsutil.c
  head/sbin/fsck_ffs/pass5.c
  head/sbin/fsdb/Makefile
  head/sbin/newfs/mkfs.c
  head/sys/kern/vfs_bio.c
  head/sys/libkern/crc32.c
  head/sys/sys/buf.h
  head/sys/ufs/ffs/ffs_alloc.c
  head/sys/ufs/ffs/ffs_inode.c
  head/sys/ufs/ffs/ffs_snapshot.c
  head/sys/ufs/ffs/ffs_vfsops.c
  head/sys/ufs/ffs/ffs_vnops.c
  head/sys/ufs/ffs/fs.h

Modified: head/lib/libufs/Makefile
==============================================================================
--- head/lib/libufs/Makefile    Fri Sep 22 12:07:03 2017        (r323922)
+++ head/lib/libufs/Makefile    Fri Sep 22 12:45:15 2017        (r323923)
@@ -5,7 +5,7 @@ LIB=    ufs
 SHLIBDIR?= /lib
 SHLIB_MAJOR=   6
 
-SRCS=  block.c cgroup.c inode.c sblock.c type.c ffs_subr.c ffs_tables.c
+SRCS=  block.c cgroup.c crc32.c inode.c sblock.c type.c ffs_subr.c ffs_tables.c
 INCS=  libufs.h
 
 MAN=   bread.3 cgread.3 libufs.3 sbread.3 ufs_disk_close.3
@@ -18,7 +18,7 @@ MLINKS+= ufs_disk_close.3 ufs_disk_fillout.3
 MLINKS+= ufs_disk_close.3 ufs_disk_fillout_blank.3
 MLINKS+= ufs_disk_close.3 ufs_disk_write.3
 
-.PATH:  ${SRCTOP}/sys/ufs/ffs
+.PATH:  ${SRCTOP}/sys/libkern ${SRCTOP}/sys/ufs/ffs
 
 WARNS?=        2
 

Modified: head/lib/libufs/cgroup.c
==============================================================================
--- head/lib/libufs/cgroup.c    Fri Sep 22 12:07:03 2017        (r323922)
+++ head/lib/libufs/cgroup.c    Fri Sep 22 12:45:15 2017        (r323923)
@@ -214,6 +214,11 @@ cgwrite1(struct uufsd *disk, int c)
        struct fs *fs;
 
        fs = &disk->d_fs;
+       if ((fs->fs_metackhash & CK_CYLGRP) != 0) {
+               disk->d_cg.cg_ckhash = 0;
+               disk->d_cg.cg_ckhash =
+                   calculate_crc32c(~0L, (void *)&disk->d_cg, fs->fs_cgsize);
+       }
        if (bwrite(disk, fsbtodb(fs, cgtod(fs, c)),
            disk->d_cgunion.d_buf, fs->fs_bsize) == -1) {
                ERROR(disk, "unable to write cylinder group");

Modified: head/lib/libufs/libufs.h
==============================================================================
--- head/lib/libufs/libufs.h    Fri Sep 22 12:07:03 2017        (r323922)
+++ head/lib/libufs/libufs.h    Fri Sep 22 12:45:15 2017        (r323923)
@@ -144,6 +144,11 @@ int        ffs_isblock(struct fs *, u_char *, 
ufs1_daddr_t);
 int    ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t);
 void   ffs_setblock(struct fs *, u_char *, ufs1_daddr_t);
 
+/*
+ * crc32c.c
+ */
+uint32_t calculate_crc32c(uint32_t, const void *, size_t);
+
 __END_DECLS
 
 #endif /* __LIBUFS_H__ */

Modified: head/sbin/dumpfs/dumpfs.c
==============================================================================
--- head/sbin/dumpfs/dumpfs.c   Fri Sep 22 12:07:03 2017        (r323922)
+++ head/sbin/dumpfs/dumpfs.c   Fri Sep 22 12:45:15 2017        (r323923)
@@ -276,6 +276,24 @@ dumpfs(const char *name)
        if (fsflags != 0)
                printf("unknown flags (%#x)", fsflags);
        putchar('\n');
+       printf("check hashes\t");
+       fsflags = afs.fs_metackhash;
+       if (fsflags == 0)
+               printf("none");
+       if (fsflags & CK_SUPERBLOCK)
+               printf("superblock ");
+       if (fsflags & CK_CYLGRP)
+               printf("cylinder-groups ");
+       if (fsflags & CK_INODE)
+               printf("inodes ");
+       if (fsflags & CK_INDIR)
+               printf("indirect-blocks ");
+       if (fsflags & CK_DIR)
+               printf("directories ");
+       fsflags &= ~(CK_SUPERBLOCK | CK_CYLGRP | CK_INODE | CK_INDIR | CK_DIR);
+       if (fsflags != 0)
+               printf("unknown flags (%#x)", fsflags);
+       putchar('\n');
        printf("fsmnt\t%s\n", afs.fs_fsmnt);
        printf("volname\t%s\tswuid\t%ju\tprovidersize\t%ju\n",
                afs.fs_volname, (uintmax_t)afs.fs_swuid,

Modified: head/sbin/fsck_ffs/fsutil.c
==============================================================================
--- head/sbin/fsck_ffs/fsutil.c Fri Sep 22 12:07:03 2017        (r323922)
+++ head/sbin/fsck_ffs/fsutil.c Fri Sep 22 12:45:15 2017        (r323923)
@@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
 #include <stdlib.h>
 #include <time.h>
 #include <unistd.h>
+#include <libufs.h>
 
 #include "fsck.h"
 
@@ -349,6 +350,20 @@ flush(int fd, struct bufarea *bp)
 
        if (!bp->b_dirty)
                return;
+       /*
+        * Calculate any needed check hashes.
+        */
+       switch (bp->b_type) {
+       case BT_CYLGRP:
+               if ((sblock.fs_metackhash & CK_CYLGRP) == 0)
+                       break;
+               bp->b_un.b_cg->cg_ckhash = 0;
+               bp->b_un.b_cg->cg_ckhash =
+                   calculate_crc32c(~0L, bp->b_un.b_buf, bp->b_size);
+               break;
+       default:
+               break;
+       }
        bp->b_dirty = 0;
        if (fswritefd < 0) {
                pfatal("WRITING IN READ_ONLY MODE.\n");

Modified: head/sbin/fsck_ffs/pass5.c
==============================================================================
--- head/sbin/fsck_ffs/pass5.c  Fri Sep 22 12:07:03 2017        (r323922)
+++ head/sbin/fsck_ffs/pass5.c  Fri Sep 22 12:45:15 2017        (r323923)
@@ -71,6 +71,13 @@ pass5(void)
        inoinfo(UFS_WINO)->ino_state = USTATE;
        memset(newcg, 0, (size_t)fs->fs_cgsize);
        newcg->cg_niblk = fs->fs_ipg;
+       if (preen == 0 && yflag == 0 && fs->fs_magic == FS_UFS2_MAGIC &&
+           fswritefd != -1 && (fs->fs_metackhash & CK_CYLGRP) == 0 &&
+           reply("ADD CYLINDER GROUP CHECKSUM PROTECTION") != 0) {
+               fs->fs_metackhash |= CK_CYLGRP;
+               rewritecg = 1;
+               sbdirty();
+       }
        if (cvtlevel >= 3) {
                if (fs->fs_maxcontig < 2 && fs->fs_contigsumsize > 0) {
                        if (preen)
@@ -305,6 +312,12 @@ pass5(void)
                                sump[run]++;
                        }
                }
+               if ((fs->fs_metackhash & CK_CYLGRP) != 0) {
+                       newcg->cg_ckhash = 0;
+                       newcg->cg_ckhash =
+                           calculate_crc32c(~0L, (void *)newcg, fs->fs_cgsize);
+               }
+
                if (bkgrdflag != 0) {
                        cstotal.cs_nffree += cg->cg_cs.cs_nffree;
                        cstotal.cs_nbfree += cg->cg_cs.cs_nbfree;

Modified: head/sbin/fsdb/Makefile
==============================================================================
--- head/sbin/fsdb/Makefile     Fri Sep 22 12:07:03 2017        (r323922)
+++ head/sbin/fsdb/Makefile     Fri Sep 22 12:45:15 2017        (r323923)
@@ -10,7 +10,7 @@ SRCS= fsdb.c fsdbutil.c \
        pass5.c setup.c utilities.c ffs_subr.c ffs_tables.c globs.c
 CFLAGS+= -I${.CURDIR:H}/fsck_ffs
 WARNS?= 2
-LIBADD=        edit
+LIBADD=        edit ufs
 .PATH: ${.CURDIR:H}/fsck_ffs ${SRCTOP}/sys/ufs/ffs
 
 .include <bsd.prog.mk>

Modified: head/sbin/newfs/mkfs.c
==============================================================================
--- head/sbin/newfs/mkfs.c      Fri Sep 22 12:07:03 2017        (r323922)
+++ head/sbin/newfs/mkfs.c      Fri Sep 22 12:45:15 2017        (r323923)
@@ -98,6 +98,7 @@ static void iput(union dinode *, ino_t);
 static int makedir(struct direct *, int);
 static void setblock(struct fs *, unsigned char *, int);
 static void wtfs(ufs2_daddr_t, int, char *);
+static void cgckhash(struct cg *);
 static u_int32_t newfs_random(void);
 
 static int
@@ -491,6 +492,11 @@ restart:
                sblock.fs_old_cstotal.cs_nifree = sblock.fs_cstotal.cs_nifree;
                sblock.fs_old_cstotal.cs_nffree = sblock.fs_cstotal.cs_nffree;
        }
+       /*
+        * Set flags for metadata that is being check-hashed.
+        */
+       if (Oflag > 1)
+               sblock.fs_metackhash = CK_CYLGRP;
 
        /*
         * Dump out summary information about file system.
@@ -791,6 +797,7 @@ initcg(int cylno, time_t utime)
                }
        }
        *cs = acg.cg_cs;
+       cgckhash(&acg);
        /*
         * Write out the duplicate super block, the cylinder group map
         * and two blocks worth of inodes in a single write.
@@ -1006,6 +1013,7 @@ goth:
                        setbit(cg_blksfree(&acg), d + i);
        }
        /* XXX cgwrite(&disk, 0)??? */
+       cgckhash(&acg);
        wtfs(fsbtodb(&sblock, cgtod(&sblock, 0)), sblock.fs_cgsize,
            (char *)&acg);
        return ((ufs2_daddr_t)d);
@@ -1027,6 +1035,7 @@ iput(union dinode *ip, ino_t ino)
        }
        acg.cg_cs.cs_nifree--;
        setbit(cg_inosused(&acg), ino);
+       cgckhash(&acg);
        wtfs(fsbtodb(&sblock, cgtod(&sblock, 0)), sblock.fs_cgsize,
            (char *)&acg);
        sblock.fs_cstotal.cs_nifree--;
@@ -1057,6 +1066,20 @@ wtfs(ufs2_daddr_t bno, int size, char *bf)
                return;
        if (bwrite(&disk, part_ofs + bno, bf, size) < 0)
                err(36, "wtfs: %d bytes at sector %jd", size, (intmax_t)bno);
+}
+
+/*
+ * Calculate the check-hash of the cylinder group.
+ */
+static void
+cgckhash(cgp)
+       struct cg *cgp;
+{
+
+       if ((sblock.fs_metackhash & CK_CYLGRP) == 0)
+               return;
+       cgp->cg_ckhash = 0;
+       cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize);
 }
 
 /*

Modified: head/sys/kern/vfs_bio.c
==============================================================================
--- head/sys/kern/vfs_bio.c     Fri Sep 22 12:07:03 2017        (r323922)
+++ head/sys/kern/vfs_bio.c     Fri Sep 22 12:45:15 2017        (r323923)
@@ -119,6 +119,8 @@ static void vfs_vmio_truncate(struct buf *bp, int npag
 static void vfs_vmio_extend(struct buf *bp, int npages, int size);
 static int vfs_bio_clcheck(struct vnode *vp, int size,
                daddr_t lblkno, daddr_t blkno);
+static void breada(struct vnode *, daddr_t *, int *, int, struct ucred *, int,
+               void (*)(struct buf *));
 static int buf_flush(struct vnode *vp, int);
 static int buf_recycle(bool);
 static int buf_scan(bool);
@@ -1783,15 +1785,14 @@ bufkva_reclaim(vmem_t *vmem, int flags)
        return;
 }
 
-
 /*
  * Attempt to initiate asynchronous I/O on read-ahead blocks.  We must
  * clear BIO_ERROR and B_INVAL prior to initiating I/O . If B_CACHE is set,
  * the buffer is valid and we do not have to do anything.
  */
-void
-breada(struct vnode * vp, daddr_t * rablkno, int * rabsize,
-    int cnt, struct ucred * cred)
+static void
+breada(struct vnode * vp, daddr_t * rablkno, int * rabsize, int cnt,
+    struct ucred * cred, int flags, void (*ckhashfunc)(struct buf *))
 {
        struct buf *rabp;
        int i;
@@ -1800,31 +1801,34 @@ breada(struct vnode * vp, daddr_t * rablkno, int * rab
                if (inmem(vp, *rablkno))
                        continue;
                rabp = getblk(vp, *rablkno, *rabsize, 0, 0, 0);
-
-               if ((rabp->b_flags & B_CACHE) == 0) {
-                       if (!TD_IS_IDLETHREAD(curthread)) {
+               if ((rabp->b_flags & B_CACHE) != 0) {
+                       brelse(rabp);
+                       continue;
+               }
+               if (!TD_IS_IDLETHREAD(curthread)) {
 #ifdef RACCT
-                               if (racct_enable) {
-                                       PROC_LOCK(curproc);
-                                       racct_add_buf(curproc, rabp, 0);
-                                       PROC_UNLOCK(curproc);
-                               }
-#endif /* RACCT */
-                               curthread->td_ru.ru_inblock++;
+                       if (racct_enable) {
+                               PROC_LOCK(curproc);
+                               racct_add_buf(curproc, rabp, 0);
+                               PROC_UNLOCK(curproc);
                        }
-                       rabp->b_flags |= B_ASYNC;
-                       rabp->b_flags &= ~B_INVAL;
-                       rabp->b_ioflags &= ~BIO_ERROR;
-                       rabp->b_iocmd = BIO_READ;
-                       if (rabp->b_rcred == NOCRED && cred != NOCRED)
-                               rabp->b_rcred = crhold(cred);
-                       vfs_busy_pages(rabp, 0);
-                       BUF_KERNPROC(rabp);
-                       rabp->b_iooffset = dbtob(rabp->b_blkno);
-                       bstrategy(rabp);
-               } else {
-                       brelse(rabp);
+#endif /* RACCT */
+                       curthread->td_ru.ru_inblock++;
                }
+               rabp->b_flags |= B_ASYNC;
+               rabp->b_flags &= ~B_INVAL;
+               if ((flags & GB_CKHASH) != 0) {
+                       rabp->b_flags |= B_CKHASH;
+                       rabp->b_ckhashcalc = ckhashfunc;
+               }
+               rabp->b_ioflags &= ~BIO_ERROR;
+               rabp->b_iocmd = BIO_READ;
+               if (rabp->b_rcred == NOCRED && cred != NOCRED)
+                       rabp->b_rcred = crhold(cred);
+               vfs_busy_pages(rabp, 0);
+               BUF_KERNPROC(rabp);
+               rabp->b_iooffset = dbtob(rabp->b_blkno);
+               bstrategy(rabp);
        }
 }
 
@@ -1840,10 +1844,11 @@ breada(struct vnode * vp, daddr_t * rablkno, int * rab
  */
 int
 breadn_flags(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablkno,
-    int *rabsize, int cnt, struct ucred *cred, int flags, struct buf **bpp)
+    int *rabsize, int cnt, struct ucred *cred, int flags,
+    void (*ckhashfunc)(struct buf *), struct buf **bpp)
 {
        struct buf *bp;
-       int rv = 0, readwait = 0;
+       int readwait, rv;
 
        CTR3(KTR_BUF, "breadn(%p, %jd, %d)", vp, blkno, size);
        /*
@@ -1853,7 +1858,10 @@ breadn_flags(struct vnode *vp, daddr_t blkno, int size
        if (bp == NULL)
                return (EBUSY);
 
-       /* if not found in cache, do some I/O */
+       /*
+        * If not found in cache, do some I/O
+        */
+       readwait = 0;
        if ((bp->b_flags & B_CACHE) == 0) {
                if (!TD_IS_IDLETHREAD(curthread)) {
 #ifdef RACCT
@@ -1867,6 +1875,10 @@ breadn_flags(struct vnode *vp, daddr_t blkno, int size
                }
                bp->b_iocmd = BIO_READ;
                bp->b_flags &= ~B_INVAL;
+               if ((flags & GB_CKHASH) != 0) {
+                       bp->b_flags |= B_CKHASH;
+                       bp->b_ckhashcalc = ckhashfunc;
+               }
                bp->b_ioflags &= ~BIO_ERROR;
                if (bp->b_rcred == NOCRED && cred != NOCRED)
                        bp->b_rcred = crhold(cred);
@@ -1876,8 +1888,12 @@ breadn_flags(struct vnode *vp, daddr_t blkno, int size
                ++readwait;
        }
 
-       breada(vp, rablkno, rabsize, cnt, cred);
+       /*
+        * Attempt to initiate asynchronous I/O on read-ahead blocks.
+        */
+       breada(vp, rablkno, rabsize, cnt, cred, flags, ckhashfunc);
 
+       rv = 0;
        if (readwait) {
                rv = bufwait(bp);
                if (rv != 0) {
@@ -4048,6 +4064,10 @@ bufdone(struct buf *bp)
        runningbufwakeup(bp);
        if (bp->b_iocmd == BIO_WRITE)
                dropobj = bp->b_bufobj;
+       else if ((bp->b_flags & B_CKHASH) != 0) {
+               KASSERT(buf_mapped(bp), ("biodone: bp %p not mapped", bp));
+               (*bp->b_ckhashcalc)(bp);
+       }
        /* call optional completion function if requested */
        if (bp->b_iodone != NULL) {
                biodone = bp->b_iodone;

Modified: head/sys/libkern/crc32.c
==============================================================================
--- head/sys/libkern/crc32.c    Fri Sep 22 12:07:03 2017        (r323922)
+++ head/sys/libkern/crc32.c    Fri Sep 22 12:45:15 2017        (r323923)
@@ -46,6 +46,8 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
+
+#ifdef _KERNEL
 #include <sys/libkern.h>
 #include <sys/systm.h>
 
@@ -57,6 +59,7 @@ __FBSDID("$FreeBSD$");
 #if defined(__aarch64__)
 #include <machine/cpu.h>
 #endif
+#endif /* _KERNEL */
 
 const uint32_t crc32_tab[] = {
        0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
@@ -759,6 +762,7 @@ calculate_crc32c(uint32_t crc32c,
     const unsigned char *buffer,
     unsigned int length)
 {
+#ifdef _KERNEL
 #if defined(__amd64__) || defined(__i386__)
        if ((cpu_feature2 & CPUID2_SSE42) != 0) {
                return (sse42_crc32c(crc32c, buffer, length));
@@ -776,6 +780,7 @@ calculate_crc32c(uint32_t crc32c,
                return (armv8_crc32c(crc32c, buffer, length));
        } else
 #endif
+#endif /* _KERNEL */
        if (length < 4) {
                return (singletable_crc32c(crc32c, buffer, length));
        } else {

Modified: head/sys/sys/buf.h
==============================================================================
--- head/sys/sys/buf.h  Fri Sep 22 12:07:03 2017        (r323922)
+++ head/sys/sys/buf.h  Fri Sep 22 12:45:15 2017        (r323923)
@@ -70,7 +70,7 @@ extern struct bio_ops {
 struct vm_object;
 struct vm_page;
 
-typedef unsigned char b_xflags_t;
+typedef uint32_t b_xflags_t;
 
 /*
  * The buffer header describes an I/O operation in the kernel.
@@ -104,6 +104,8 @@ struct buf {
        off_t           b_iooffset;
        long            b_resid;
        void    (*b_iodone)(struct buf *);
+       void    (*b_ckhashcalc)(struct buf *);
+       uint64_t        b_ckhash;       /* B_CKHASH requested check-hash */
        daddr_t b_blkno;                /* Underlying physical block number. */
        off_t   b_offset;               /* Offset into file. */
        TAILQ_ENTRY(buf) b_bobufs;      /* (V) Buffer's associated vnode. */
@@ -209,7 +211,7 @@ struct buf {
 #define        B_CACHE         0x00000020      /* Bread found us in the cache. 
*/
 #define        B_VALIDSUSPWRT  0x00000040      /* Valid write during 
suspension. */
 #define        B_DELWRI        0x00000080      /* Delay I/O until buffer 
reused. */
-#define        B_00000100      0x00000100      /* Available flag. */
+#define        B_CKHASH        0x00000100      /* checksum hash calculated on 
read */
 #define        B_DONE          0x00000200      /* I/O completed. */
 #define        B_EINTR         0x00000400      /* I/O was interrupted */
 #define        B_NOREUSE       0x00000800      /* Contents not reused once 
released. */
@@ -242,12 +244,17 @@ struct buf {
 
 /*
  * These flags are kept in b_xflags.
+ *
+ * BX_FSPRIV reserves a set of eight flags that may be used by individual
+ * filesystems for their own purpose. Their specific definitions are
+ * found in the header files for each filesystem that uses them.
  */
 #define        BX_VNDIRTY      0x00000001      /* On vnode dirty list */
 #define        BX_VNCLEAN      0x00000002      /* On vnode clean list */
 #define        BX_BKGRDWRITE   0x00000010      /* Do writes in background */
 #define BX_BKGRDMARKER 0x00000020      /* Mark buffer for splay tree */
 #define        BX_ALTDATA      0x00000040      /* Holds extended data */
+#define        BX_FSPRIV       0x00FF0000      /* filesystem-specific flags 
mask */
 
 #define        PRINT_BUF_XFLAGS 
"\20\7altdata\6bkgrdmarker\5bkgrdwrite\2clean\1dirty"
 
@@ -467,6 +474,7 @@ buf_track(struct buf *bp, const char *location)
 #define        GB_NOWAIT_BD    0x0004          /* Do not wait for bufdaemon. */
 #define        GB_UNMAPPED     0x0008          /* Do not mmap buffer pages. */
 #define        GB_KVAALLOC     0x0010          /* But allocate KVA. */
+#define        GB_CKHASH       0x0020          /* If reading, calc checksum 
hash */
 
 #ifdef _KERNEL
 extern int     nbuf;                   /* The number of buffer headers */
@@ -504,15 +512,15 @@ int       buf_dirty_count_severe(void);
 void   bremfree(struct buf *);
 void   bremfreef(struct buf *);        /* XXX Force bremfree, only for nfs. */
 #define bread(vp, blkno, size, cred, bpp) \
-           breadn_flags(vp, blkno, size, NULL, NULL, 0, cred, 0, bpp)
+           breadn_flags(vp, blkno, size, NULL, NULL, 0, cred, 0, NULL, bpp)
 #define bread_gb(vp, blkno, size, cred, gbflags, bpp) \
            breadn_flags(vp, blkno, size, NULL, NULL, 0, cred, \
-               gbflags, bpp)
+               gbflags, NULL, bpp)
 #define breadn(vp, blkno, size, rablkno, rabsize, cnt, cred, bpp) \
-           breadn_flags(vp, blkno, size, rablkno, rabsize, cnt, cred, 0, bpp)
+           breadn_flags(vp, blkno, size, rablkno, rabsize, cnt, cred, \
+               0, NULL, bpp)
 int    breadn_flags(struct vnode *, daddr_t, int, daddr_t *, int *, int,
-           struct ucred *, int, struct buf **);
-void   breada(struct vnode *, daddr_t *, int *, int, struct ucred *);
+           struct ucred *, int, void (*)(struct buf *), struct buf **);
 void   bdwrite(struct buf *);
 void   bawrite(struct buf *);
 void   babarrierwrite(struct buf *);

Modified: head/sys/ufs/ffs/ffs_alloc.c
==============================================================================
--- head/sys/ufs/ffs/ffs_alloc.c        Fri Sep 22 12:07:03 2017        
(r323922)
+++ head/sys/ufs/ffs/ffs_alloc.c        Fri Sep 22 12:45:15 2017        
(r323923)
@@ -123,6 +123,7 @@ static ufs2_daddr_t ffs_nodealloccg(struct inode *, u_
 static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int);
 static int     ffs_reallocblks_ufs1(struct vop_reallocblks_args *);
 static int     ffs_reallocblks_ufs2(struct vop_reallocblks_args *);
+static void    ffs_ckhash_cg(struct buf *);
 
 /*
  * Allocate a block in the filesystem.
@@ -2596,25 +2597,53 @@ ffs_getcg(fs, devvp, cg, bpp, cgpp)
 {
        struct buf *bp;
        struct cg *cgp;
-       int error;
+       int flags, error;
 
        *bpp = NULL;
        *cgpp = NULL;
-       error = bread(devvp, devvp->v_type == VREG ?
+       flags = 0;
+       if ((fs->fs_metackhash & CK_CYLGRP) != 0)
+               flags |= GB_CKHASH;
+       error = breadn_flags(devvp, devvp->v_type == VREG ?
            fragstoblks(fs, cgtod(fs, cg)) : fsbtodb(fs, cgtod(fs, cg)),
-           (int)fs->fs_cgsize, NOCRED, &bp);
+           (int)fs->fs_cgsize, NULL, NULL, 0, NOCRED, flags,
+           ffs_ckhash_cg, &bp);
        if (error != 0)
                return (error);
        cgp = (struct cg *)bp->b_data;
-       if (!cg_chkmagic(cgp) || cgp->cg_cgx != cg) {
+       if (((fs->fs_metackhash & CK_CYLGRP) != 0 &&
+           (bp->b_flags & B_CKHASH) != 0 &&
+           cgp->cg_ckhash != bp->b_ckhash) ||
+           !cg_chkmagic(cgp) || cgp->cg_cgx != cg) {
+               printf("checksum failed: cg %u, cgp: 0x%x != bp: 0x%lx\n",
+                   cg, cgp->cg_ckhash, bp->b_ckhash);
+               bp->b_flags &= ~B_CKHASH;
+               bp->b_flags |= B_INVAL | B_NOCACHE;
                brelse(bp);
                return (EIO);
        }
+       bp->b_flags &= ~B_CKHASH;
        bp->b_xflags |= BX_BKGRDWRITE;
+       if ((fs->fs_metackhash & CK_CYLGRP) != 0)
+               bp->b_xflags |= BX_CYLGRP;
        cgp->cg_old_time = cgp->cg_time = time_second;
        *bpp = bp;
        *cgpp = cgp;
        return (0);
+}
+
+static void
+ffs_ckhash_cg(bp)
+       struct buf *bp;
+{
+       uint32_t ckhash;
+       struct cg *cgp;
+
+       cgp = (struct cg *)bp->b_data;
+       ckhash = cgp->cg_ckhash;
+       cgp->cg_ckhash = 0;
+       bp->b_ckhash = calculate_crc32c(~0L, bp->b_data, bp->b_bcount);
+       cgp->cg_ckhash = ckhash;
 }
 
 /*

Modified: head/sys/ufs/ffs/ffs_inode.c
==============================================================================
--- head/sys/ufs/ffs/ffs_inode.c        Fri Sep 22 12:07:03 2017        
(r323922)
+++ head/sys/ufs/ffs/ffs_inode.c        Fri Sep 22 12:45:15 2017        
(r323923)
@@ -110,9 +110,9 @@ ffs_update(vp, waitfor)
        if (IS_SNAPSHOT(ip))
                flags = GB_LOCK_NOWAIT;
 loop:
-       error = breadn_flags(ITODEVVP(ip),
+       error = bread_gb(ITODEVVP(ip),
             fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
-            (int) fs->fs_bsize, 0, 0, 0, NOCRED, flags, &bp);
+            (int) fs->fs_bsize, NOCRED, flags, &bp);
        if (error != 0) {
                if (error != EBUSY)
                        return (error);

Modified: head/sys/ufs/ffs/ffs_snapshot.c
==============================================================================
--- head/sys/ufs/ffs/ffs_snapshot.c     Fri Sep 22 12:07:03 2017        
(r323922)
+++ head/sys/ufs/ffs/ffs_snapshot.c     Fri Sep 22 12:45:15 2017        
(r323923)
@@ -927,7 +927,7 @@ cgaccount(cg, vp, nbp, passno)
        error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)),
            fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
        if (error) {
-               return (error);
+               goto out;
        }
        indiroff = (base + loc - UFS_NDADDR) % NINDIR(fs);
        for ( ; loc < len; loc++, indiroff++) {
@@ -939,7 +939,7 @@ cgaccount(cg, vp, nbp, passno)
                            lblktosize(fs, (off_t)(base + loc)),
                            fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
                        if (error) {
-                               return (error);
+                               goto out;
                        }
                        indiroff = 0;
                }
@@ -967,7 +967,21 @@ cgaccount(cg, vp, nbp, passno)
        if (passno == 2)
                ibp->b_flags |= B_VALIDSUSPWRT;
        bdwrite(ibp);
-       return (0);
+out:
+       /*
+        * We have to calculate the crc32c here rather than just setting the
+        * BX_CYLGRP b_xflags because the allocation of the block for the
+        * the cylinder group map will always be a full size block (fs_bsize)
+        * even though the cylinder group may be smaller (fs_cgsize). The
+        * crc32c must be computed only over fs_cgsize whereas the BX_CYLGRP
+        * flag causes it to be computed over the size of the buffer.
+        */
+       if ((fs->fs_metackhash & CK_CYLGRP) != 0) {
+               ((struct cg *)nbp->b_data)->cg_ckhash = 0;
+               ((struct cg *)nbp->b_data)->cg_ckhash =
+                   calculate_crc32c(~0L, nbp->b_data, fs->fs_cgsize);
+       }
+       return (error);
 }
 
 /*

Modified: head/sys/ufs/ffs/ffs_vfsops.c
==============================================================================
--- head/sys/ufs/ffs/ffs_vfsops.c       Fri Sep 22 12:07:03 2017        
(r323922)
+++ head/sys/ufs/ffs/ffs_vfsops.c       Fri Sep 22 12:45:15 2017        
(r323923)
@@ -838,7 +838,11 @@ ffs_mountfs(devvp, mp, td)
                goto out;
        }
        fs->fs_fmod = 0;
-       fs->fs_flags &= ~FS_INDEXDIRS;  /* no support for directory indices */
+       /* none of these types of check-hashes are maintained */
+       fs->fs_metackhash &= ~(CK_SUPERBLOCK | CK_INODE | CK_INDIR | CK_DIR);
+       /* no support for directory indices or any other undefined flags */
+       fs->fs_flags &= ~FS_INDEXDIRS;
+       fs->fs_flags &= FS_SUPPORTED;
        fs->fs_flags &= ~FS_UNCLEAN;
        if (fs->fs_clean == 0) {
                fs->fs_flags |= FS_UNCLEAN;
@@ -2150,7 +2154,8 @@ ffs_bufwrite(struct buf *bp)
                BO_LOCK(bp->b_bufobj);
                bp->b_vflags |= BV_BKGRDINPROG;
                BO_UNLOCK(bp->b_bufobj);
-               newbp->b_xflags |= BX_BKGRDMARKER;
+               newbp->b_xflags |=
+                   (bp->b_xflags & BX_FSPRIV) | BX_BKGRDMARKER;
                newbp->b_lblkno = bp->b_lblkno;
                newbp->b_blkno = bp->b_blkno;
                newbp->b_offset = bp->b_offset;
@@ -2194,9 +2199,8 @@ static void
 ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
 {
        struct vnode *vp;
-       int error;
        struct buf *tbp;
-       int nocopy;
+       int error, nocopy;
 
        vp = bo2vnode(bo);
        if (bp->b_iocmd == BIO_WRITE) {
@@ -2247,6 +2251,32 @@ ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
                }
 
 #endif
+               /*
+                * Check for metadata that needs check-hashes and update them.
+                */
+               switch (bp->b_xflags & BX_FSPRIV) {
+               case BX_CYLGRP:
+                       ((struct cg *)bp->b_data)->cg_ckhash = 0;
+                       ((struct cg *)bp->b_data)->cg_ckhash =
+                           calculate_crc32c(~0L, bp->b_data, bp->b_bcount);
+                       break;
+
+               case BX_SUPERBLOCK:
+               case BX_INODE:
+               case BX_INDIR:
+               case BX_DIR:
+                       printf("Check-hash write is unimplemented!!!\n");
+                       break;
+
+               case 0:
+                       break;
+
+               default:
+                       printf("multiple buffer types 0x%b\n",
+                           (u_int)(bp->b_xflags & BX_FSPRIV),
+                           PRINT_UFS_BUF_XFLAGS);
+                       break;
+               }
        }
        g_vfs_strategy(bo, bp);
 }

Modified: head/sys/ufs/ffs/ffs_vnops.c
==============================================================================
--- head/sys/ufs/ffs/ffs_vnops.c        Fri Sep 22 12:07:03 2017        
(r323922)
+++ head/sys/ufs/ffs/ffs_vnops.c        Fri Sep 22 12:45:15 2017        
(r323923)
@@ -587,7 +587,7 @@ ffs_read(ap)
                         */
                        u_int nextsize = blksize(fs, ip, nextlbn);
                        error = breadn_flags(vp, lbn, size, &nextlbn,
-                           &nextsize, 1, NOCRED, GB_UNMAPPED, &bp);
+                           &nextsize, 1, NOCRED, GB_UNMAPPED, NULL, &bp);
                } else {
                        /*
                         * Failing all of the above, just read what the

Modified: head/sys/ufs/ffs/fs.h
==============================================================================
--- head/sys/ufs/ffs/fs.h       Fri Sep 22 12:07:03 2017        (r323922)
+++ head/sys/ufs/ffs/fs.h       Fri Sep 22 12:45:15 2017        (r323923)
@@ -361,7 +361,8 @@ struct fs {
        int32_t  fs_save_cgsize;        /* save real cg size to use fs_bsize */
        ufs_time_t fs_mtime;            /* Last mount or fsck time. */
        int32_t  fs_sujfree;            /* SUJ free list */
-       int32_t  fs_sparecon32[23];     /* reserved for future constants */
+       int32_t  fs_sparecon32[22];     /* reserved for future constants */
+       u_int32_t fs_metackhash;        /* metadata check-hash, see CK_ below */
        int32_t  fs_flags;              /* see FS_ flags below */
        int32_t  fs_contigsumsize;      /* size of cluster summary array */ 
        int32_t  fs_maxsymlinklen;      /* max length of an internal symlink */
@@ -388,7 +389,6 @@ CTASSERT(sizeof(struct fs) == 1376);
 #define        FS_UFS1_MAGIC   0x011954        /* UFS1 fast filesystem magic 
number */
 #define        FS_UFS2_MAGIC   0x19540119      /* UFS2 fast filesystem magic 
number */
 #define        FS_BAD_MAGIC    0x19960408      /* UFS incomplete newfs magic 
number */
-#define        FS_OKAY         0x7c269d38      /* superblock checksum */
 #define        FS_42INODEFMT   -1              /* 4.2BSD inode format */
 #define        FS_44INODEFMT   2               /* 4.4BSD inode format */
 
@@ -415,7 +415,11 @@ CTASSERT(sizeof(struct fs) == 1376);
  * on-disk auxiliary indexes (such as B-trees) for speeding directory
  * accesses. Kernels that do not support auxiliary indices clear the
  * flag to indicate that the indices need to be rebuilt (by fsck) before
- * they can be used.
+ * they can be used. When a filesystem is mounted, any flags not
+ * included in FS_SUPPORTED are cleared. This lets newer features
+ * know that the filesystem has been run on an older version of the
+ * filesystem and thus that data structures associated with those
+ * features are out-of-date and need to be rebuilt.
  *
  * FS_ACLS indicates that POSIX.1e ACLs are administratively enabled
  * for the file system, so they should be loaded from extended attributes,
@@ -437,8 +441,30 @@ CTASSERT(sizeof(struct fs) == 1376);
 #define        FS_NFS4ACLS     0x0100  /* file system has NFSv4 ACLs enabled */
 #define        FS_INDEXDIRS    0x0200  /* kernel supports indexed directories 
*/
 #define        FS_TRIM         0x0400  /* issue BIO_DELETE for deleted blocks 
*/
+#define        FS_SUPPORTED    0xFFFF  /* supported flags, others cleared at 
mount */
 
 /*
+ * The fs_metackhash field indicates the types of metadata check-hash
+ * that are maintained for a filesystem. Not all filesystems check-hash
+ * all metadata.
+ */
+#define        CK_SUPERBLOCK   0x0001  /* the superblock */
+#define        CK_CYLGRP       0x0002  /* the cylinder groups */
+#define        CK_INODE        0x0004  /* inodes */
+#define        CK_INDIR        0x0008  /* indirect blocks */
+#define        CK_DIR          0x0010  /* directory contents */
+/*
+ * The BX_FSPRIV buffer b_xflags are used to track types of data in buffers.
+ */
+#define        BX_SUPERBLOCK   0x00010000      /* superblock */
+#define        BX_CYLGRP       0x00020000      /* cylinder groups */
+#define        BX_INODE        0x00040000      /* inodes */
+#define        BX_INDIR        0x00080000      /* indirect blocks */
+#define        BX_DIR          0x00100000      /* directory contents */
+
+#define        PRINT_UFS_BUF_XFLAGS 
"\20\25dir\24indir\23inode\22cylgrp\21superblock"
+
+/*
  * Macros to access bits in the fs_active array.
  */
 #define        ACTIVECGNUM(fs, cg)     ((fs)->fs_active[(cg) / (NBBY * 
sizeof(int))])
@@ -506,7 +532,8 @@ struct cg {
        u_int32_t cg_niblk;             /* number of inode blocks this cg */
        u_int32_t cg_initediblk;                /* last initialized inode */
        u_int32_t cg_unrefs;            /* number of unreferenced inodes */
-       int32_t  cg_sparecon32[2];      /* reserved for future use */
+       int32_t  cg_sparecon32[1];      /* reserved for future use */
+       u_int32_t cg_ckhash;            /* check-hash of this cg */
        ufs_time_t cg_time;             /* time last written */
        int64_t  cg_sparecon64[3];      /* reserved for future use */
        u_int8_t cg_space[1];           /* space for cylinder group maps */
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to