Author: mm
Date: Mon Sep 27 09:42:31 2010
New Revision: 213198
URL: http://svn.freebsd.org/changeset/base/213198

Log:
  Properly handle IO with B_FAILFAST
  Retry IO once with ZIO_FLAG_TRYHARD before declaring a pool faulted
  
  OpenSolaris revision and Bug IDs:
  
  9725:0bf7402e8022
  6843014 ZFS B_FAILFAST handling is broken
  
  Approved by:  delphij (mentor)
  Obtained from:        OpenSolaris (Bug ID 6843014)
  MFC after:    3 weeks

Modified:
  head/cddl/contrib/opensolaris/cmd/zinject/zinject.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c

Modified: head/cddl/contrib/opensolaris/cmd/zinject/zinject.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zinject/zinject.c Mon Sep 27 09:05:51 
2010        (r213197)
+++ head/cddl/contrib/opensolaris/cmd/zinject/zinject.c Mon Sep 27 09:42:31 
2010        (r213198)
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident  "%Z%%M% %I%     %E% SMI"
-
 /*
  * ZFS Fault Injector
  *
@@ -227,7 +225,7 @@ usage(void)
            "\t\tClear the particular record (if given a numeric ID), or\n"
            "\t\tall records if 'all' is specificed.\n"
            "\n"
-           "\tzinject -d device [-e errno] [-L <nvlist|uber>] pool\n"
+           "\tzinject -d device [-e errno] [-L <nvlist|uber>] [-F] pool\n"
            "\t\tInject a fault into a particular device or the device's\n"
            "\t\tlabel.  Label injection can either be 'nvlist' or 'uber'.\n"
            "\t\t'errno' can either be 'nxio' (the default) or 'io'.\n"
@@ -519,7 +517,7 @@ main(int argc, char **argv)
                return (0);
        }
 
-       while ((c = getopt(argc, argv, ":ab:d:f:qhc:t:l:mr:e:uL:")) != -1) {
+       while ((c = getopt(argc, argv, ":ab:d:f:Fqhc:t:l:mr:e:uL:")) != -1) {
                switch (c) {
                case 'a':
                        flags |= ZINJECT_FLUSH_ARC;
@@ -556,6 +554,9 @@ main(int argc, char **argv)
                                return (1);
                        }
                        break;
+               case 'F':
+                       record.zi_failfast = B_TRUE;
+                       break;
                case 'h':
                        usage();
                        return (0);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c   Mon Sep 27 
09:05:51 2010        (r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c   Mon Sep 27 
09:42:31 2010        (r213198)
@@ -4252,10 +4252,16 @@ spa_sync(spa_t *spa, uint64_t txg)
                                if (svdcount == SPA_DVAS_PER_BP)
                                        break;
                        }
-                       error = vdev_config_sync(svd, svdcount, txg);
+                       error = vdev_config_sync(svd, svdcount, txg, B_FALSE);
+                       if (error != 0)
+                               error = vdev_config_sync(svd, svdcount, txg,
+                                   B_TRUE);
                } else {
                        error = vdev_config_sync(rvd->vdev_child,
-                           rvd->vdev_children, txg);
+                           rvd->vdev_children, txg, B_FALSE);
+                       if (error != 0)
+                               error = vdev_config_sync(rvd->vdev_child,
+                                   rvd->vdev_children, txg, B_TRUE);
                }
 
                spa_config_exit(spa, SCL_STATE, FTAG);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h      Mon Sep 
27 09:05:51 2010        (r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h      Mon Sep 
27 09:42:31 2010        (r213198)
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -113,7 +113,8 @@ extern void vdev_queue_io_done(zio_t *zi
 
 extern void vdev_config_dirty(vdev_t *vd);
 extern void vdev_config_clean(vdev_t *vd);
-extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg);
+extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg,
+    boolean_t);
 
 extern void vdev_state_dirty(vdev_t *vd);
 extern void vdev_state_clean(vdev_t *vd);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h Mon Sep 
27 09:05:51 2010        (r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h Mon Sep 
27 09:42:31 2010        (r213198)
@@ -118,7 +118,7 @@ typedef struct zinject_record {
        uint32_t        zi_error;
        uint64_t        zi_type;
        uint32_t        zi_freq;
-       uint32_t        zi_pad; /* pad out to 64 bit alignment */
+       uint32_t        zi_failfast;
 } zinject_record_t;
 
 #define        ZINJECT_NULL            0x1

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h       Mon Sep 
27 09:05:51 2010        (r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h       Mon Sep 
27 09:42:31 2010        (r213198)
@@ -117,31 +117,33 @@ enum zio_compress {
 #define        ZIO_PRIORITY_SCRUB              (zio_priority_table[10])
 #define        ZIO_PRIORITY_TABLE_SIZE         11
 
-#define        ZIO_FLAG_MUSTSUCCEED            0x00000
-#define        ZIO_FLAG_CANFAIL                0x00001
-#define        ZIO_FLAG_SPECULATIVE            0x00002
-#define        ZIO_FLAG_CONFIG_WRITER          0x00004
-#define        ZIO_FLAG_DONT_RETRY             0x00008
-
-#define        ZIO_FLAG_DONT_CACHE             0x00010
-#define        ZIO_FLAG_DONT_QUEUE             0x00020
-#define        ZIO_FLAG_DONT_AGGREGATE         0x00040
-#define        ZIO_FLAG_DONT_PROPAGATE         0x00080
-
-#define        ZIO_FLAG_IO_BYPASS              0x00100
-#define        ZIO_FLAG_IO_REPAIR              0x00200
-#define        ZIO_FLAG_IO_RETRY               0x00400
-#define        ZIO_FLAG_IO_REWRITE             0x00800
-
-#define        ZIO_FLAG_SELF_HEAL              0x01000
-#define        ZIO_FLAG_RESILVER               0x02000
-#define        ZIO_FLAG_SCRUB                  0x04000
-#define        ZIO_FLAG_SCRUB_THREAD           0x08000
-
-#define        ZIO_FLAG_PROBE                  0x10000
-#define        ZIO_FLAG_GANG_CHILD             0x20000
-#define        ZIO_FLAG_RAW                    0x40000
-#define        ZIO_FLAG_GODFATHER              0x80000
+#define        ZIO_FLAG_MUSTSUCCEED            0x000000
+#define        ZIO_FLAG_CANFAIL                0x000001
+#define        ZIO_FLAG_SPECULATIVE            0x000002
+#define        ZIO_FLAG_CONFIG_WRITER          0x000004
+#define        ZIO_FLAG_DONT_RETRY             0x000008
+
+#define        ZIO_FLAG_DONT_CACHE             0x000010
+#define        ZIO_FLAG_DONT_QUEUE             0x000020
+#define        ZIO_FLAG_DONT_AGGREGATE         0x000040
+#define        ZIO_FLAG_DONT_PROPAGATE         0x000080
+
+#define        ZIO_FLAG_IO_BYPASS              0x000100
+#define        ZIO_FLAG_IO_REPAIR              0x000200
+#define        ZIO_FLAG_IO_RETRY               0x000400
+#define        ZIO_FLAG_IO_REWRITE             0x000800
+
+#define        ZIO_FLAG_SELF_HEAL              0x001000
+#define        ZIO_FLAG_RESILVER               0x002000
+#define        ZIO_FLAG_SCRUB                  0x004000
+#define        ZIO_FLAG_SCRUB_THREAD           0x008000
+
+#define        ZIO_FLAG_PROBE                  0x010000
+#define        ZIO_FLAG_GANG_CHILD             0x020000
+#define        ZIO_FLAG_RAW                    0x040000
+#define        ZIO_FLAG_GODFATHER              0x080000
+
+#define        ZIO_FLAG_TRYHARD                0x100000
 
 #define        ZIO_FLAG_GANG_INHERIT           \
        (ZIO_FLAG_CANFAIL |             \
@@ -159,7 +161,8 @@ enum zio_compress {
        (ZIO_FLAG_GANG_INHERIT |        \
        ZIO_FLAG_IO_REPAIR |            \
        ZIO_FLAG_IO_RETRY |             \
-       ZIO_FLAG_PROBE)
+       ZIO_FLAG_PROBE |                \
+       ZIO_FLAG_TRYHARD)
 
 #define        ZIO_FLAG_AGG_INHERIT            \
        (ZIO_FLAG_DONT_AGGREGATE |      \
@@ -440,7 +443,7 @@ extern int zio_inject_list_next(int *id,
     struct zinject_record *record);
 extern int zio_clear_fault(int id);
 extern int zio_handle_fault_injection(zio_t *zio, int error);
-extern int zio_handle_device_injection(vdev_t *vd, int error);
+extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
 extern int zio_handle_label_injection(zio_t *zio, int error);
 
 #ifdef __cplusplus

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c  Mon Sep 27 
09:05:51 2010        (r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c  Mon Sep 27 
09:42:31 2010        (r213198)
@@ -928,7 +928,7 @@ vdev_probe(vdev_t *vd, zio_t *zio)
 
                vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE |
                    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE |
-                   ZIO_FLAG_DONT_RETRY;
+                   ZIO_FLAG_TRYHARD;
 
                if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) {
                        /*
@@ -1025,7 +1025,7 @@ vdev_open(vdev_t *vd)
        error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift);
 
        if (zio_injection_enabled && error == 0)
-               error = zio_handle_device_injection(vd, ENXIO);
+               error = zio_handle_device_injection(vd, NULL, ENXIO);
 
        if (error) {
                if (vd->vdev_removed &&
@@ -2207,6 +2207,16 @@ vdev_stat_update(zio_t *zio, uint64_t ps
        if (flags & ZIO_FLAG_SPECULATIVE)
                return;
 
+       /*
+        * If this is an I/O error that is going to be retried, then ignore the
+        * error.  Otherwise, the user may interpret B_FAILFAST I/O errors as
+        * hard errors, when in reality they can happen for any number of
+        * innocuous reasons (bus resets, MPxIO link failure, etc).
+        */
+       if (zio->io_error == EIO &&
+           !(zio->io_flags & ZIO_FLAG_IO_RETRY))
+               return;
+
        mutex_enter(&vd->vdev_stat_lock);
        if (type == ZIO_TYPE_READ && !vdev_is_dead(vd)) {
                if (zio->io_error == ECKSUM)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c     Mon Sep 
27 09:05:51 2010        (r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c     Mon Sep 
27 09:42:31 2010        (r213198)
@@ -401,8 +401,9 @@ vdev_disk_io_start(zio_t *zio)
 
        bioinit(bp);
        bp->b_flags = B_BUSY | B_NOCACHE |
-           (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE) |
-           ((zio->io_flags & ZIO_FLAG_IO_RETRY) ? 0 : B_FAILFAST);
+           (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
+       if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
+               bp->b_flags |= B_FAILFAST;
        bp->b_bcount = zio->io_size;
        bp->b_un.b_addr = zio->io_data;
        bp->b_lblkno = lbtodb(zio->io_offset);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c    Mon Sep 
27 09:05:51 2010        (r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c    Mon Sep 
27 09:42:31 2010        (r213198)
@@ -339,8 +339,8 @@ vdev_label_read_config(vdev_t *vd)
        nvlist_t *config = NULL;
        vdev_phys_t *vp;
        zio_t *zio;
-       int flags =
-           ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
+       int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
+           ZIO_FLAG_SPECULATIVE;
 
        ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
 
@@ -349,6 +349,7 @@ vdev_label_read_config(vdev_t *vd)
 
        vp = zio_buf_alloc(sizeof (vdev_phys_t));
 
+retry:
        for (int l = 0; l < VDEV_LABELS; l++) {
 
                zio = zio_root(spa, NULL, NULL, flags);
@@ -368,6 +369,11 @@ vdev_label_read_config(vdev_t *vd)
                }
        }
 
+       if (config == NULL && !(flags & ZIO_FLAG_TRYHARD)) {
+               flags |= ZIO_FLAG_TRYHARD;
+               goto retry;
+       }
+
        zio_buf_free(vp, sizeof (vdev_phys_t));
 
        return (config);
@@ -648,6 +654,7 @@ vdev_label_init(vdev_t *vd, uint64_t crt
        /*
         * Write everything in parallel.
         */
+retry:
        zio = zio_root(spa, NULL, NULL, flags);
 
        for (int l = 0; l < VDEV_LABELS; l++) {
@@ -674,6 +681,11 @@ vdev_label_init(vdev_t *vd, uint64_t crt
 
        error = zio_wait(zio);
 
+       if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) {
+               flags |= ZIO_FLAG_TRYHARD;
+               goto retry;
+       }
+
        nvlist_free(label);
        zio_buf_free(pad2, VDEV_PAD_SIZE);
        zio_buf_free(ub, VDEV_UBERBLOCK_SIZE(vd));
@@ -760,8 +772,8 @@ vdev_uberblock_load(zio_t *zio, vdev_t *
 {
        spa_t *spa = vd->vdev_spa;
        vdev_t *rvd = spa->spa_root_vdev;
-       int flags =
-           ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
+       int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
+           ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
 
        if (vd == rvd) {
                ASSERT(zio == NULL);
@@ -999,7 +1011,7 @@ vdev_label_sync_list(spa_t *spa, int l, 
  * at any time, you can just call it again, and it will resume its work.
  */
 int
-vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
+vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, boolean_t tryhard)
 {
        spa_t *spa = svd[0]->vdev_spa;
        uberblock_t *ub = &spa->spa_uberblock;
@@ -1008,6 +1020,16 @@ vdev_config_sync(vdev_t **svd, int svdco
        int error;
        int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
 
+       /*
+        * Normally, we don't want to try too hard to write every label and
+        * uberblock.  If there is a flaky disk, we don't want the rest of the
+        * sync process to block while we retry.  But if we can't write a
+        * single label out, we should retry with ZIO_FLAG_TRYHARD before
+        * bailing out and declaring the pool faulted.
+        */
+       if (tryhard)
+               flags |= ZIO_FLAG_TRYHARD;
+
        ASSERT(ub->ub_txg <= txg);
 
        /*

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c        Mon Sep 
27 09:05:51 2010        (r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c        Mon Sep 
27 09:42:31 2010        (r213198)
@@ -134,6 +134,15 @@ zfs_ereport_post(const char *subclass, s
                if (zio->io_flags & ZIO_FLAG_SPECULATIVE)
                        return;
 
+               /*
+                * If this I/O is not a retry I/O, don't post an ereport.
+                * Otherwise, we risk making bad diagnoses based on B_FAILFAST
+                * I/Os.
+                */
+               if (zio->io_error == EIO &&
+                   !(zio->io_flags & ZIO_FLAG_IO_RETRY))
+                       return;
+
                if (vd != NULL) {
                        /*
                         * If the vdev has already been marked as failing due

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c   Mon Sep 27 
09:05:51 2010        (r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c   Mon Sep 27 
09:42:31 2010        (r213198)
@@ -1870,7 +1870,8 @@ zio_vdev_io_done(zio_t *zio)
                        vdev_cache_write(zio);
 
                if (zio_injection_enabled && zio->io_error == 0)
-                       zio->io_error = zio_handle_device_injection(vd, EIO);
+                       zio->io_error = zio_handle_device_injection(vd,
+                           zio, EIO);
 
                if (zio_injection_enabled && zio->io_error == 0)
                        zio->io_error = zio_handle_label_injection(zio, EIO);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c    Mon Sep 
27 09:05:51 2010        (r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c    Mon Sep 
27 09:42:31 2010        (r213198)
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -195,7 +195,7 @@ zio_handle_label_injection(zio_t *zio, i
 
 
 int
-zio_handle_device_injection(vdev_t *vd, int error)
+zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
 {
        inject_handler_t *handler;
        int ret = 0;
@@ -210,6 +210,12 @@ zio_handle_device_injection(vdev_t *vd, 
                        continue;
 
                if (vd->vdev_guid == handler->zi_record.zi_guid) {
+                       if (handler->zi_record.zi_failfast &&
+                           (zio == NULL || (zio->io_flags &
+                           (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) {
+                               continue;
+                       }
+
                        if (handler->zi_record.zi_error == error) {
                                /*
                                 * For a failed open, pretend like the device
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to