Author: delphij
Date: Wed Jan 20 01:13:52 2010
New Revision: 202669
URL: http://svn.freebsd.org/changeset/base/202669

Log:
  MFC r201689:
  
  Instead of assuming all vdevs are healthy, check the newest vdev label
  for each vdev's status.  Booting from a degraded vdev should now be
  more robust.
  
  Submitted by: Matt Reimer <mattjreimer at gmail.com>
  Sponsored by: VPOP Technologies, Inc.

Modified:
  stable/8/sys/boot/zfs/zfsimpl.c
  stable/8/sys/cddl/boot/zfs/zfsimpl.h
Directory Properties:
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)
  stable/8/sys/dev/xen/xenpci/   (props changed)

Modified: stable/8/sys/boot/zfs/zfsimpl.c
==============================================================================
--- stable/8/sys/boot/zfs/zfsimpl.c     Wed Jan 20 01:07:38 2010        
(r202668)
+++ stable/8/sys/boot/zfs/zfsimpl.c     Wed Jan 20 01:13:52 2010        
(r202669)
@@ -404,7 +404,7 @@ vdev_create(uint64_t guid, vdev_read_t *
 }
 
 static int
-vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t **vdevp)
+vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t **vdevp, int 
is_newer)
 {
        int rc;
        uint64_t guid, id, ashift, nparity;
@@ -412,7 +412,8 @@ vdev_init_from_nvlist(const unsigned cha
        const char *path;
        vdev_t *vdev, *kid;
        const unsigned char *kids;
-       int nkids, i;
+       int nkids, i, is_new;
+       uint64_t is_offline, is_faulted, is_degraded, is_removed;
 
        if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID,
                        DATA_TYPE_UINT64, 0, &guid)
@@ -424,17 +425,6 @@ vdev_init_from_nvlist(const unsigned cha
                return (ENOENT);
        }
 
-       /*
-        * Assume that if we've seen this vdev tree before, this one
-        * will be identical.
-        */
-       vdev = vdev_find(guid);
-       if (vdev) {
-               if (vdevp)
-                       *vdevp = vdev;
-               return (0);
-       }
-
        if (strcmp(type, VDEV_TYPE_MIRROR)
            && strcmp(type, VDEV_TYPE_DISK)
            && strcmp(type, VDEV_TYPE_RAIDZ)) {
@@ -442,6 +432,21 @@ vdev_init_from_nvlist(const unsigned cha
                return (EIO);
        }
 
+       is_offline = is_removed = is_faulted = is_degraded = 0;
+
+       nvlist_find(nvlist, ZPOOL_CONFIG_OFFLINE, DATA_TYPE_UINT64, 0,
+                       &is_offline);
+       nvlist_find(nvlist, ZPOOL_CONFIG_REMOVED, DATA_TYPE_UINT64, 0,
+                       &is_removed);
+       nvlist_find(nvlist, ZPOOL_CONFIG_FAULTED, DATA_TYPE_UINT64, 0,
+                       &is_faulted);
+       nvlist_find(nvlist, ZPOOL_CONFIG_DEGRADED, DATA_TYPE_UINT64, 0,
+                       &is_degraded);
+
+       vdev = vdev_find(guid);
+       if (!vdev) {
+               is_new = 1;
+
        if (!strcmp(type, VDEV_TYPE_MIRROR))
                vdev = vdev_create(guid, vdev_mirror_read);
        else if (!strcmp(type, VDEV_TYPE_RAIDZ))
@@ -480,6 +485,39 @@ vdev_init_from_nvlist(const unsigned cha
                        vdev->v_name = strdup(type);
                }
        }
+
+               if (is_offline)
+                       vdev->v_state = VDEV_STATE_OFFLINE;
+               else if (is_removed)
+                       vdev->v_state = VDEV_STATE_REMOVED;
+               else if (is_faulted)
+                       vdev->v_state = VDEV_STATE_FAULTED;
+               else if (is_degraded)
+                       vdev->v_state = VDEV_STATE_DEGRADED;
+               else
+                       vdev->v_state = VDEV_STATE_HEALTHY;
+       } else {
+               is_new = 0;
+
+               if (is_newer) {
+                       /*
+                        * We've already seen this vdev, but from an older
+                        * vdev label, so let's refresh its state from the
+                        * newer label.
+                        */
+                       if (is_offline)
+                               vdev->v_state = VDEV_STATE_OFFLINE;
+                       else if (is_removed)
+                               vdev->v_state = VDEV_STATE_REMOVED;
+                       else if (is_faulted)
+                               vdev->v_state = VDEV_STATE_FAULTED;
+                       else if (is_degraded)
+                               vdev->v_state = VDEV_STATE_DEGRADED;
+                       else
+                               vdev->v_state = VDEV_STATE_HEALTHY;
+               }
+       }
+
        rc = nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN,
                         DATA_TYPE_NVLIST_ARRAY, &nkids, &kids);
        /*
@@ -488,10 +526,12 @@ vdev_init_from_nvlist(const unsigned cha
        if (rc == 0) {
                vdev->v_nchildren = nkids;
                for (i = 0; i < nkids; i++) {
-                       rc = vdev_init_from_nvlist(kids, &kid);
+                       rc = vdev_init_from_nvlist(kids, &kid, is_newer);
                        if (rc)
                                return (rc);
-                       STAILQ_INSERT_TAIL(&vdev->v_children, kid, v_childlink);
+                       if (is_new)
+                               STAILQ_INSERT_TAIL(&vdev->v_children, kid,
+                                                  v_childlink);
                        kids = nvlist_next(kids);
                }
        } else {
@@ -593,7 +633,9 @@ state_name(vdev_state_t state)
                "UNKNOWN",
                "CLOSED",
                "OFFLINE",
+               "REMOVED",
                "CANT_OPEN",
+               "FAULTED",
                "DEGRADED",
                "ONLINE"
        };
@@ -711,7 +753,7 @@ vdev_probe(vdev_phys_read_t *read, void 
        uint64_t pool_txg, pool_guid;
        const char *pool_name;
        const unsigned char *vdevs;
-       int i, rc;
+       int i, rc, is_newer;
        char upbuf[1024];
        const struct uberblock *up;
 
@@ -793,12 +835,15 @@ vdev_probe(vdev_phys_read_t *read, void 
                spa = spa_create(pool_guid);
                spa->spa_name = strdup(pool_name);
        }
-       if (pool_txg > spa->spa_txg)
+       if (pool_txg > spa->spa_txg) {
                spa->spa_txg = pool_txg;
+               is_newer = 1;
+       } else
+               is_newer = 0;
 
        /*
         * Get the vdev tree and create our in-core copy of it.
-        * If we already have a healthy vdev with this guid, this must
+        * If we already have a vdev with this guid, this must
         * be some kind of alias (overlapping slices, dangerously dedicated
         * disks etc).
         */
@@ -808,16 +853,16 @@ vdev_probe(vdev_phys_read_t *read, void 
                return (EIO);
        }
        vdev = vdev_find(guid);
-       if (vdev && vdev->v_state == VDEV_STATE_HEALTHY) {
+       if (vdev && vdev->v_phys_read)  /* Has this vdev already been inited? */
                return (EIO);
-       }
 
        if (nvlist_find(nvlist,
                        ZPOOL_CONFIG_VDEV_TREE,
                        DATA_TYPE_NVLIST, 0, &vdevs)) {
                return (EIO);
        }
-       rc = vdev_init_from_nvlist(vdevs, &top_vdev);
+
+       rc = vdev_init_from_nvlist(vdevs, &top_vdev, is_newer);
        if (rc)
                return (rc);
 
@@ -838,7 +883,6 @@ vdev_probe(vdev_phys_read_t *read, void 
        if (vdev) {
                vdev->v_phys_read = read;
                vdev->v_read_priv = read_priv;
-               vdev->v_state = VDEV_STATE_HEALTHY;
        } else {
                printf("ZFS: inconsistent nvlist contents\n");
                return (EIO);

Modified: stable/8/sys/cddl/boot/zfs/zfsimpl.h
==============================================================================
--- stable/8/sys/cddl/boot/zfs/zfsimpl.h        Wed Jan 20 01:07:38 2010        
(r202668)
+++ stable/8/sys/cddl/boot/zfs/zfsimpl.h        Wed Jan 20 01:13:52 2010        
(r202669)
@@ -548,7 +548,6 @@ typedef enum {
 #define        ZPOOL_CONFIG_DTL                "DTL"
 #define        ZPOOL_CONFIG_STATS              "stats"
 #define        ZPOOL_CONFIG_WHOLE_DISK         "whole_disk"
-#define        ZPOOL_CONFIG_OFFLINE            "offline"
 #define        ZPOOL_CONFIG_ERRCOUNT           "error_count"
 #define        ZPOOL_CONFIG_NOT_PRESENT        "not_present"
 #define        ZPOOL_CONFIG_SPARES             "spares"
@@ -558,6 +557,16 @@ typedef enum {
 #define        ZPOOL_CONFIG_HOSTNAME           "hostname"
 #define        ZPOOL_CONFIG_TIMESTAMP          "timestamp" /* not stored on 
disk */
 
+/*
+ * The persistent vdev state is stored as separate values rather than a single
+ * 'vdev_state' entry.  This is because a device can be in multiple states, 
such
+ * as offline and degraded.
+ */
+#define        ZPOOL_CONFIG_OFFLINE            "offline"
+#define        ZPOOL_CONFIG_FAULTED            "faulted"
+#define        ZPOOL_CONFIG_DEGRADED           "degraded"
+#define        ZPOOL_CONFIG_REMOVED            "removed"
+
 #define        VDEV_TYPE_ROOT                  "root"
 #define        VDEV_TYPE_MIRROR                "mirror"
 #define        VDEV_TYPE_REPLACING             "replacing"
@@ -590,7 +599,9 @@ typedef enum vdev_state {
        VDEV_STATE_UNKNOWN = 0, /* Uninitialized vdev                   */
        VDEV_STATE_CLOSED,      /* Not currently open                   */
        VDEV_STATE_OFFLINE,     /* Not allowed to open                  */
+       VDEV_STATE_REMOVED,     /* Explicitly removed from system       */
        VDEV_STATE_CANT_OPEN,   /* Tried to open, but failed            */
+       VDEV_STATE_FAULTED,     /* External request to fault device     */
        VDEV_STATE_DEGRADED,    /* Replicated vdev with unhealthy kids  */
        VDEV_STATE_HEALTHY      /* Presumed good                        */
 } vdev_state_t;
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to