Author: mckusick
Date: Mon Oct 17 21:49:54 2016
New Revision: 307534
URL: https://svnweb.freebsd.org/changeset/base/307534

Log:
  MFC r304230:
  Add two new macros, SLIST_CONCAT and LIST_CONCAT.
  
  MFC r304239:
  Bug 211013 reports that a write error to a UFS filesystem running
  with softupdates panics the kernel.
  
  PR: 211013

Modified:
  stable/10/share/man/man3/queue.3
  stable/10/sys/sys/queue.h
  stable/10/sys/ufs/ffs/ffs_softdep.c
  stable/10/sys/ufs/ffs/softdep.h
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/share/man/man3/queue.3
==============================================================================
--- stable/10/share/man/man3/queue.3    Mon Oct 17 21:44:41 2016        
(r307533)
+++ stable/10/share/man/man3/queue.3    Mon Oct 17 21:49:54 2016        
(r307534)
@@ -32,12 +32,13 @@
 .\"    @(#)queue.3     8.2 (Berkeley) 1/24/94
 .\" $FreeBSD$
 .\"
-.Dd June 24, 2015
+.Dd August 15, 2016
 .Dt QUEUE 3
 .Os
 .Sh NAME
 .Nm SLIST_CLASS_ENTRY ,
 .Nm SLIST_CLASS_HEAD ,
+.Nm SLIST_CONCAT ,
 .Nm SLIST_EMPTY ,
 .Nm SLIST_ENTRY ,
 .Nm SLIST_FIRST ,
@@ -79,6 +80,7 @@
 .Nm STAILQ_SWAP ,
 .Nm LIST_CLASS_ENTRY ,
 .Nm LIST_CLASS_HEAD ,
+.Nm LIST_CONCAT ,
 .Nm LIST_EMPTY ,
 .Nm LIST_ENTRY ,
 .Nm LIST_FIRST ,
@@ -129,6 +131,7 @@ lists and tail queues
 .\"
 .Fn SLIST_CLASS_ENTRY "CLASSTYPE"
 .Fn SLIST_CLASS_HEAD "HEADNAME" "CLASSTYPE"
+.Fn SLIST_CONCAT "SLIST_HEAD *head1" "SLIST_HEAD *head2" "TYPE" "SLIST_ENTRY 
NAME"
 .Fn SLIST_EMPTY "SLIST_HEAD *head"
 .Fn SLIST_ENTRY "TYPE"
 .Fn SLIST_FIRST "SLIST_HEAD *head"
@@ -172,6 +175,7 @@ lists and tail queues
 .\"
 .Fn LIST_CLASS_ENTRY "CLASSTYPE"
 .Fn LIST_CLASS_HEAD "HEADNAME" "CLASSTYPE"
+.Fn LIST_CONCAT "LIST_HEAD *head1" "LIST_HEAD *head2" "TYPE" "LIST_ENTRY NAME"
 .Fn LIST_EMPTY "LIST_HEAD *head"
 .Fn LIST_ENTRY "TYPE"
 .Fn LIST_FIRST "LIST_HEAD *head"
@@ -253,6 +257,8 @@ Singly-linked lists add the following fu
 .Bl -enum -compact -offset indent
 .It
 O(n) removal of any entry in the list.
+.It
+O(n) concatenation of two lists.
 .El
 .Pp
 Singly-linked tail queues add the following functionality:
@@ -300,6 +306,8 @@ Linked lists are the simplest of the dou
 They add the following functionality over the above:
 .Bl -enum -compact -offset indent
 .It
+O(n) concatenation of two lists.
+.It
 They may be traversed backwards.
 .El
 However:
@@ -405,6 +413,19 @@ evaluates to an initializer for the list
 .Fa head .
 .Pp
 The macro
+.Nm SLIST_CONCAT
+concatenates the list headed by
+.Fa head2
+onto the end of the one headed by
+.Fa head1
+removing all entries from the former.
+Use of this macro should be avoided as it traverses the entirety of the
+.Fa head1
+list.
+A singly-linked tail queue should be used if this macro is needed in
+high-usage code paths or to operate on long lists.
+.Pp
+The macro
 .Nm SLIST_EMPTY
 evaluates to true if there are no elements in the list.
 .Pp
@@ -512,6 +533,9 @@ The macro
 removes the element
 .Fa elm
 from the list.
+Use of this macro should be avoided as it traverses the entire list.
+A doubly-linked list should be used if this macro is needed in
+high-usage code paths or to operate on long lists.
 .Pp
 The macro
 .Nm SLIST_SWAP
@@ -728,6 +752,9 @@ The macro
 removes the element
 .Fa elm
 from the tail queue.
+Use of this macro should be avoided as it traverses the entire list.
+A doubly-linked tail queue should be used if this macro is needed in
+high-usage code paths or to operate on long tail queues.
 .Pp
 The macro
 .Nm STAILQ_SWAP
@@ -827,6 +854,19 @@ evaluates to an initializer for the list
 .Fa head .
 .Pp
 The macro
+.Nm LIST_CONCAT
+concatenates the list headed by
+.Fa head2
+onto the end of the one headed by
+.Fa head1
+removing all entries from the former.
+Use of this macro should be avoided as it traverses the entirety of the
+.Fa head1
+list.
+A tail queue should be used if this macro is needed in
+high-usage code paths or to operate on long lists.
+.Pp
+The macro
 .Nm LIST_EMPTY
 evaluates to true if there are no elements in the list.
 .Pp

Modified: stable/10/sys/sys/queue.h
==============================================================================
--- stable/10/sys/sys/queue.h   Mon Oct 17 21:44:41 2016        (r307533)
+++ stable/10/sys/sys/queue.h   Mon Oct 17 21:49:54 2016        (r307534)
@@ -76,6 +76,10 @@
  *
  * For details on the use of these macros, see the queue(3) manual page.
  *
+ * Below is a summary of implemented functions where:
+ *  +  means the macro is available
+ *  -  means the macro is not available
+ *  s  means the macro is available but is slow (runs in O(n) time)
  *
  *                             SLIST   LIST    STAILQ  TAILQ
  * _HEAD                       +       +       +       +
@@ -101,10 +105,10 @@
  * _INSERT_BEFORE              -       +       -       +
  * _INSERT_AFTER               +       +       +       +
  * _INSERT_TAIL                        -       -       +       +
- * _CONCAT                     -       -       +       +
+ * _CONCAT                     s       s       +       +
  * _REMOVE_AFTER               +       -       +       -
  * _REMOVE_HEAD                        +       -       +       -
- * _REMOVE                     +       +       +       +
+ * _REMOVE                     s       +       s       +
  * _SWAP                       +       +       +       +
  *
  */
@@ -183,6 +187,19 @@ struct {                                                   
        \
 /*
  * Singly-linked List functions.
  */
+#define SLIST_CONCAT(head1, head2, type, field) do {                   \
+       QUEUE_TYPEOF(type) *curelm = SLIST_FIRST(head1);                \
+       if (curelm == NULL) {                                           \
+               if ((SLIST_FIRST(head1) = SLIST_FIRST(head2)) != NULL)  \
+                       SLIST_INIT(head2);                              \
+       } else if (SLIST_FIRST(head2) != NULL) {                        \
+               while (SLIST_NEXT(curelm, field) != NULL)               \
+                       curelm = SLIST_NEXT(curelm, field);             \
+               SLIST_NEXT(curelm, field) = SLIST_FIRST(head2);         \
+               SLIST_INIT(head2);                                      \
+       }                                                               \
+} while (0)
+
 #define        SLIST_EMPTY(head)       ((head)->slh_first == NULL)
 
 #define        SLIST_FIRST(head)       ((head)->slh_first)
@@ -447,6 +464,23 @@ struct {                                                   
        \
 #define        QMD_LIST_CHECK_PREV(elm, field)
 #endif /* (_KERNEL && INVARIANTS) */
 
+#define LIST_CONCAT(head1, head2, type, field) do {                          \
+       QUEUE_TYPEOF(type) *curelm = LIST_FIRST(head1);                       \
+       if (curelm == NULL) {                                                 \
+               if ((LIST_FIRST(head1) = LIST_FIRST(head2)) != NULL) {        \
+                       LIST_FIRST(head2)->field.le_prev =                    \
+                           &LIST_FIRST((head1));                             \
+                       LIST_INIT(head2);                                     \
+               }                                                             \
+       } else if (LIST_FIRST(head2) != NULL) {                               \
+               while (LIST_NEXT(curelm, field) != NULL)                      \
+                       curelm = LIST_NEXT(curelm, field);                    \
+               LIST_NEXT(curelm, field) = LIST_FIRST(head2);                 \
+               LIST_FIRST(head2)->field.le_prev = &LIST_NEXT(curelm, field); \
+               LIST_INIT(head2);                                             \
+       }                                                                     \
+} while (0)
+
 #define        LIST_EMPTY(head)        ((head)->lh_first == NULL)
 
 #define        LIST_FIRST(head)        ((head)->lh_first)

Modified: stable/10/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- stable/10/sys/ufs/ffs/ffs_softdep.c Mon Oct 17 21:44:41 2016        
(r307533)
+++ stable/10/sys/ufs/ffs/ffs_softdep.c Mon Oct 17 21:49:54 2016        
(r307534)
@@ -751,16 +751,16 @@ static    int flush_newblk_dep(struct vnode
 static int flush_inodedep_deps(struct vnode *, struct mount *, ino_t);
 static int flush_deplist(struct allocdirectlst *, int, int *);
 static int sync_cgs(struct mount *, int);
-static int handle_written_filepage(struct pagedep *, struct buf *);
+static int handle_written_filepage(struct pagedep *, struct buf *, int);
 static int handle_written_sbdep(struct sbdep *, struct buf *);
 static void initiate_write_sbdep(struct sbdep *);
 static void diradd_inode_written(struct diradd *, struct inodedep *);
 static int handle_written_indirdep(struct indirdep *, struct buf *,
-           struct buf**);
-static int handle_written_inodeblock(struct inodedep *, struct buf *);
+           struct buf**, int);
+static int handle_written_inodeblock(struct inodedep *, struct buf *, int);
 static int jnewblk_rollforward(struct jnewblk *, struct fs *, struct cg *,
            uint8_t *);
-static int handle_written_bmsafemap(struct bmsafemap *, struct buf *);
+static int handle_written_bmsafemap(struct bmsafemap *, struct buf *, int);
 static void handle_written_jaddref(struct jaddref *);
 static void handle_written_jremref(struct jremref *);
 static void handle_written_jseg(struct jseg *, struct buf *);
@@ -10868,6 +10868,10 @@ initiate_write_bmsafemap(bmsafemap, bp)
        struct fs *fs;
        ino_t ino;
 
+       /*
+        * If this is a background write, we did this at the time that
+        * the copy was made, so do not need to do it again.
+        */
        if (bmsafemap->sm_state & IOSTARTED)
                return;
        bmsafemap->sm_state |= IOSTARTED;
@@ -10941,10 +10945,39 @@ softdep_disk_write_complete(bp)
 
        /*
         * If an error occurred while doing the write, then the data
-        * has not hit the disk and the dependencies cannot be unrolled.
+        * has not hit the disk and the dependencies cannot be processed.
+        * But we do have to go through and roll forward any dependencies
+        * that were rolled back before the disk write.
         */
-       if ((bp->b_ioflags & BIO_ERROR) != 0 && (bp->b_flags & B_INVAL) == 0)
+       if ((bp->b_ioflags & BIO_ERROR) != 0 && (bp->b_flags & B_INVAL) == 0) {
+               LIST_FOREACH(wk, &bp->b_dep, wk_list) {
+                       switch (wk->wk_type) {
+
+                       case D_PAGEDEP:
+                               handle_written_filepage(WK_PAGEDEP(wk), bp, 0);
+                               continue;
+
+                       case D_INODEDEP:
+                               handle_written_inodeblock(WK_INODEDEP(wk),
+                                   bp, 0);
+                               continue;
+
+                       case D_BMSAFEMAP:
+                               handle_written_bmsafemap(WK_BMSAFEMAP(wk),
+                                   bp, 0);
+                               continue;
+
+                       case D_INDIRDEP:
+                               handle_written_indirdep(WK_INDIRDEP(wk),
+                                   bp, &sbp, 0);
+                               continue;
+                       default:
+                               /* nothing to roll forward */
+                               continue;
+                       }
+               }
                return;
+       }
        if ((wk = LIST_FIRST(&bp->b_dep)) == NULL)
                return;
        ump = VFSTOUFS(wk->wk_mp);
@@ -10964,17 +10997,20 @@ softdep_disk_write_complete(bp)
                switch (wk->wk_type) {
 
                case D_PAGEDEP:
-                       if (handle_written_filepage(WK_PAGEDEP(wk), bp))
+                       if (handle_written_filepage(WK_PAGEDEP(wk), bp,
+                           WRITESUCCEEDED))
                                WORKLIST_INSERT(&reattach, wk);
                        continue;
 
                case D_INODEDEP:
-                       if (handle_written_inodeblock(WK_INODEDEP(wk), bp))
+                       if (handle_written_inodeblock(WK_INODEDEP(wk), bp,
+                           WRITESUCCEEDED))
                                WORKLIST_INSERT(&reattach, wk);
                        continue;
 
                case D_BMSAFEMAP:
-                       if (handle_written_bmsafemap(WK_BMSAFEMAP(wk), bp))
+                       if (handle_written_bmsafemap(WK_BMSAFEMAP(wk), bp,
+                           WRITESUCCEEDED))
                                WORKLIST_INSERT(&reattach, wk);
                        continue;
 
@@ -10993,7 +11029,8 @@ softdep_disk_write_complete(bp)
                        continue;
 
                case D_INDIRDEP:
-                       if (handle_written_indirdep(WK_INDIRDEP(wk), bp, &sbp))
+                       if (handle_written_indirdep(WK_INDIRDEP(wk), bp, &sbp,
+                           WRITESUCCEEDED))
                                WORKLIST_INSERT(&reattach, wk);
                        continue;
 
@@ -11293,12 +11330,17 @@ handle_bufwait(inodedep, refhd)
  * Called from within softdep_disk_write_complete above to restore
  * in-memory inode block contents to their most up-to-date state. Note
  * that this routine is always called from interrupt level with further
- * splbio interrupts blocked.
+ * interrupts from this device blocked.
+ *
+ * If the write did not succeed, we will do all the roll-forward
+ * operations, but we will not take the actions that will allow its
+ * dependencies to be processed.
  */
 static int 
-handle_written_inodeblock(inodedep, bp)
+handle_written_inodeblock(inodedep, bp, flags)
        struct inodedep *inodedep;
        struct buf *bp;         /* buffer containing the inode block */
+       int flags;
 {
        struct freefile *freefile;
        struct allocdirect *adp, *nextadp;
@@ -11328,7 +11370,8 @@ handle_written_inodeblock(inodedep, bp)
        /*
         * Leave this inodeblock dirty until it's in the list.
         */
-       if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) == UNLINKED) {
+       if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) == UNLINKED &&
+           (flags & WRITESUCCEEDED)) {
                struct inodedep *inon;
 
                inon = TAILQ_NEXT(inodedep, id_unlinked);
@@ -11367,7 +11410,8 @@ handle_written_inodeblock(inodedep, bp)
                        goto bufwait;
                return (1);
        }
-       inodedep->id_state |= COMPLETE;
+       if (flags & WRITESUCCEEDED)
+               inodedep->id_state |= COMPLETE;
        /*
         * Roll forward anything that had to be rolled back before 
         * the inode could be updated.
@@ -11482,6 +11526,13 @@ handle_written_inodeblock(inodedep, bp)
                bdirty(bp);
 bufwait:
        /*
+        * If the write did not succeed, we have done all the roll-forward
+        * operations, but we cannot take the actions that will allow its
+        * dependencies to be processed.
+        */
+       if ((flags & WRITESUCCEEDED) == 0)
+               return (hadchanges);
+       /*
         * Process any allocdirects that completed during the update.
         */
        if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
@@ -11538,11 +11589,20 @@ bufwait:
        return (hadchanges);
 }
 
+/*
+ * Perform needed roll-forwards and kick off any dependencies that
+ * can now be processed.
+ *
+ * If the write did not succeed, we will do all the roll-forward
+ * operations, but we will not take the actions that will allow its
+ * dependencies to be processed.
+ */
 static int
-handle_written_indirdep(indirdep, bp, bpp)
+handle_written_indirdep(indirdep, bp, bpp, flags)
        struct indirdep *indirdep;
        struct buf *bp;
        struct buf **bpp;
+       int flags;
 {
        struct allocindir *aip;
        struct buf *sbp;
@@ -11567,6 +11627,16 @@ handle_written_indirdep(indirdep, bp, bp
        indirdep->ir_state &= ~(UNDONE | IOSTARTED);
        indirdep->ir_state |= ATTACHED;
        /*
+        * If the write did not succeed, we have done all the roll-forward
+        * operations, but we cannot take the actions that will allow its
+        * dependencies to be processed.
+        */
+       if ((flags & WRITESUCCEEDED) == 0) {
+               stat_indir_blk_ptrs++;
+               bdirty(bp);
+               return (1);
+       }
+       /*
         * Move allocindirs with written pointers to the completehd if
         * the indirdep's pointer is not yet written.  Otherwise
         * free them here.
@@ -11720,11 +11790,16 @@ jnewblk_rollforward(jnewblk, fs, cgp, bl
  * Complete a write to a bmsafemap structure.  Roll forward any bitmap
  * changes if it's not a background write.  Set all written dependencies 
  * to DEPCOMPLETE and free the structure if possible.
+ *
+ * If the write did not succeed, we will do all the roll-forward
+ * operations, but we will not take the actions that will allow its
+ * dependencies to be processed.
  */
 static int
-handle_written_bmsafemap(bmsafemap, bp)
+handle_written_bmsafemap(bmsafemap, bp, flags)
        struct bmsafemap *bmsafemap;
        struct buf *bp;
+       int flags;
 {
        struct newblk *newblk;
        struct inodedep *inodedep;
@@ -11740,15 +11815,20 @@ handle_written_bmsafemap(bmsafemap, bp)
        int chgs;
 
        if ((bmsafemap->sm_state & IOSTARTED) == 0)
-               panic("initiate_write_bmsafemap: Not started\n");
+               panic("handle_written_bmsafemap: Not started\n");
        ump = VFSTOUFS(bmsafemap->sm_list.wk_mp);
        chgs = 0;
        bmsafemap->sm_state &= ~IOSTARTED;
        foreground = (bp->b_xflags & BX_BKGRDMARKER) == 0;
        /*
-        * Release journal work that was waiting on the write.
+        * If write was successful, release journal work that was waiting
+        * on the write. Otherwise move the work back.
         */
-       handle_jwork(&bmsafemap->sm_freewr);
+       if (flags & WRITESUCCEEDED)
+               handle_jwork(&bmsafemap->sm_freewr);
+       else
+               LIST_CONCAT(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr,
+                   worklist, wk_list);
 
        /*
         * Restore unwritten inode allocation pending jaddref writes.
@@ -11798,6 +11878,20 @@ handle_written_bmsafemap(bmsafemap, bp)
                        free_jnewblk(jnewblk);
                }
        }
+       /*
+        * If the write did not succeed, we have done all the roll-forward
+        * operations, but we cannot take the actions that will allow its
+        * dependencies to be processed.
+        */
+       if ((flags & WRITESUCCEEDED) == 0) {
+               LIST_CONCAT(&bmsafemap->sm_newblkhd, &bmsafemap->sm_newblkwr,
+                   newblk, nb_deps);
+               LIST_CONCAT(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr,
+                   worklist, wk_list);
+               if (foreground)
+                       bdirty(bp);
+               return (1);
+       }
        while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkwr))) {
                newblk->nb_state |= DEPCOMPLETE;
                newblk->nb_state &= ~ONDEPLIST;
@@ -11901,12 +11995,17 @@ free_pagedep(pagedep)
  * A write operation was just completed. Removed inodes can
  * now be freed and associated block pointers may be committed.
  * Note that this routine is always called from interrupt level
- * with further splbio interrupts blocked.
+ * with further interrupts from this device blocked.
+ *
+ * If the write did not succeed, we will do all the roll-forward
+ * operations, but we will not take the actions that will allow its
+ * dependencies to be processed.
  */
 static int 
-handle_written_filepage(pagedep, bp)
+handle_written_filepage(pagedep, bp, flags)
        struct pagedep *pagedep;
        struct buf *bp;         /* buffer containing the written page */
+       int flags;
 {
        struct dirrem *dirrem;
        struct diradd *dap, *nextdap;
@@ -11916,6 +12015,8 @@ handle_written_filepage(pagedep, bp)
        if ((pagedep->pd_state & IOSTARTED) == 0)
                panic("handle_written_filepage: not started");
        pagedep->pd_state &= ~IOSTARTED;
+       if ((flags & WRITESUCCEEDED) == 0)
+               goto rollforward;
        /*
         * Process any directory removals that have been committed.
         */
@@ -11935,6 +12036,7 @@ handle_written_filepage(pagedep, bp)
        if ((pagedep->pd_state & NEWBLOCK) == 0)
                while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
                        free_diradd(dap, NULL);
+rollforward:
        /*
         * Uncommitted directory entries must be restored.
         */
@@ -11967,7 +12069,7 @@ handle_written_filepage(pagedep, bp)
         * marked dirty so that its will eventually get written back in
         * its correct form.
         */
-       if (chgs) {
+       if (chgs || (flags & WRITESUCCEEDED) == 0) {
                if ((bp->b_flags & B_DELWRI) == 0)
                        stat_dir_entry++;
                bdirty(bp);

Modified: stable/10/sys/ufs/ffs/softdep.h
==============================================================================
--- stable/10/sys/ufs/ffs/softdep.h     Mon Oct 17 21:44:41 2016        
(r307533)
+++ stable/10/sys/ufs/ffs/softdep.h     Mon Oct 17 21:49:54 2016        
(r307534)
@@ -140,6 +140,7 @@
 #define        UNLINKPREV      0x100000 /* inodedep is pointed at in the 
unlink list */
 #define        UNLINKONLIST    0x200000 /* inodedep is in the unlinked list on 
disk */
 #define        UNLINKLINKS     (UNLINKNEXT | UNLINKPREV)
+#define        WRITESUCCEEDED  0x400000 /* the disk write completed 
successfully */
 
 #define        ALLCOMPLETE     (ATTACHED | COMPLETE | DEPCOMPLETE)
 
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to