Author: asomers
Date: Sat May 28 17:43:40 2016
New Revision: 300906
URL: https://svnweb.freebsd.org/changeset/base/300906

Log:
  zfsd(8), the ZFS fault management daemon
  
  Add zfsd, which deals with hard drive faults in ZFS pools. It manages
  hotspares and replements in drive slots that publish physical paths.
  
  cddl/usr.sbin/zfsd
        Add zfsd(8) and its unit tests
  
  cddl/usr.sbin/Makefile
        Add zfsd to the build
  
  lib/libdevdctl
        A C++ library that helps devd clients process events
  
  lib/Makefile
  share/mk/bsd.libnames.mk
  share/mk/src.libnames.mk
        Add libdevdctl to the build. It's a private library, unusable by
        out-of-tree software.
  
  etc/defaults/rc.conf
        By default, set zfsd_enable to NO
  
  etc/mtree/BSD.include.dist
        Add a directory for libdevdctl's include files
  
  etc/mtree/BSD.tests.dist
        Add a directory for zfsd's unit tests
  
  etc/mtree/BSD.var.dist
        Add /var/db/zfsd/cases, where zfsd stores case files while it's shut
        down.
  
  etc/rc.d/Makefile
  etc/rc.d/zfsd
        Add zfsd's rc script
  
  sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
        Fix the resource.fs.zfs.statechange message. It had a number of
        problems:
  
        It was only being emitted on a transition to the HEALTHY state.
        That made it impossible for zfsd to take actions based on drives
        getting sicker.
  
        It compared the new state to vdev_prevstate, which is the state that
        the vdev had the last time it was opened.  That doesn't make sense,
        because a vdev can change state multiple times without being
        reopened.
  
        vdev_set_state contains logic that will change the device's new
        state based on various conditions.  However, the statechange event
        was being posted _before_ that logic took effect.  Now it's being
        posted after.
  
  Submitted by: gibbs, asomers, mav, allanjude
  Reviewed by:  mav, delphij
  Relnotes:     yes
  Sponsored by: Spectra Logic Corp, iX Systems
  Differential Revision:        https://reviews.freebsd.org/D6564

Added:
  head/cddl/usr.sbin/zfsd/
  head/cddl/usr.sbin/zfsd/Makefile   (contents, props changed)
  head/cddl/usr.sbin/zfsd/Makefile.common   (contents, props changed)
  head/cddl/usr.sbin/zfsd/callout.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/callout.h   (contents, props changed)
  head/cddl/usr.sbin/zfsd/case_file.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/case_file.h   (contents, props changed)
  head/cddl/usr.sbin/zfsd/tests/
  head/cddl/usr.sbin/zfsd/tests/Makefile   (contents, props changed)
  head/cddl/usr.sbin/zfsd/tests/libmocks.c   (contents, props changed)
  head/cddl/usr.sbin/zfsd/tests/libmocks.h   (contents, props changed)
  head/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/tests/zfsd_unittest.supp   (contents, props changed)
  head/cddl/usr.sbin/zfsd/vdev.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/vdev.h   (contents, props changed)
  head/cddl/usr.sbin/zfsd/vdev_iterator.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/vdev_iterator.h   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zfsd.8   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zfsd.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zfsd.h   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zfsd_event.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zfsd_event.h   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zfsd_exception.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zfsd_exception.h   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zfsd_main.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zpool_list.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zpool_list.h   (contents, props changed)
  head/etc/rc.d/zfsd   (contents, props changed)
  head/lib/libdevdctl/
  head/lib/libdevdctl/Makefile   (contents, props changed)
  head/lib/libdevdctl/consumer.cc   (contents, props changed)
  head/lib/libdevdctl/consumer.h   (contents, props changed)
  head/lib/libdevdctl/event.cc   (contents, props changed)
  head/lib/libdevdctl/event.h   (contents, props changed)
  head/lib/libdevdctl/event_factory.cc   (contents, props changed)
  head/lib/libdevdctl/event_factory.h   (contents, props changed)
  head/lib/libdevdctl/exception.cc   (contents, props changed)
  head/lib/libdevdctl/exception.h   (contents, props changed)
  head/lib/libdevdctl/guid.cc   (contents, props changed)
  head/lib/libdevdctl/guid.h   (contents, props changed)
  head/lib/libdevdctl/tests/
  head/lib/libdevdctl/tests/Makefile   (contents, props changed)
  head/lib/libdevdctl/tests/libdevdctl_unittest.cc   (contents, props changed)
Modified:
  head/cddl/usr.sbin/Makefile
  head/etc/defaults/rc.conf
  head/etc/mtree/BSD.include.dist
  head/etc/mtree/BSD.tests.dist
  head/etc/mtree/BSD.var.dist
  head/etc/rc.d/Makefile
  head/lib/Makefile
  head/share/mk/bsd.libnames.mk
  head/share/mk/src.libnames.mk
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c

Modified: head/cddl/usr.sbin/Makefile
==============================================================================
--- head/cddl/usr.sbin/Makefile Sat May 28 16:38:09 2016        (r300905)
+++ head/cddl/usr.sbin/Makefile Sat May 28 17:43:40 2016        (r300906)
@@ -7,6 +7,7 @@ SUBDIR= ${_dtrace} \
        ${_plockstat} \
        ${_tests} \
        ${_zdb} \
+       ${_zfsd} \
        ${_zhack}
 
 .if ${MK_TESTS} != "no"
@@ -18,6 +19,9 @@ _tests=       tests
 _zdb=  zdb
 _zhack=        zhack
 .endif
+. if ${MK_CXX} != "no"
+_zfsd= zfsd
+. endif
 .endif
 
 .if ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "i386"

Added: head/cddl/usr.sbin/zfsd/Makefile
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/cddl/usr.sbin/zfsd/Makefile    Sat May 28 17:43:40 2016        
(r300906)
@@ -0,0 +1,13 @@
+# $FreeBSD$
+
+SRCDIR=${.CURDIR}/../../..
+.include "Makefile.common"
+
+PROG_CXX=      zfsd
+MAN=           zfsd.8
+
+.include <bsd.prog.mk>
+
+# The unittests require devel/googletest and devel/googlemock from ports.
+# Don't automatically build them.
+SUBDIR=

Added: head/cddl/usr.sbin/zfsd/Makefile.common
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/cddl/usr.sbin/zfsd/Makefile.common     Sat May 28 17:43:40 2016        
(r300906)
@@ -0,0 +1,42 @@
+# $FreeBSD$
+
+SRCS=          callout.cc              \
+               case_file.cc            \
+               zfsd_event.cc           \
+               vdev.cc                 \
+               vdev_iterator.cc        \
+               zfsd.cc                 \
+               zfsd_exception.cc       \
+               zpool_list.cc           \
+               zfsd_main.cc
+
+WARNS?=                3
+
+# Ignore warnings about Solaris specific pragmas.
+IGNORE_PRAGMA=  YES
+
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libzpool/common
+INCFLAGS+= -I${SRCDIR}/cddl/compat/opensolaris/include
+INCFLAGS+= -I${SRCDIR}/cddl/compat/opensolaris/lib/libumem
+INCFLAGS+= -I${SRCDIR}/sys/cddl/compat/opensolaris
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/head
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libuutil/common
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libumem/common
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libzfs_core/common
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libzfs/common
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libnvpair
+INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/common/zfs
+INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/uts/common
+INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
+INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/uts/common/sys
+
+CFLAGS= -g -DNEED_SOLARIS_BOOLEAN ${INCFLAGS}
+
+DPADD=  ${LIBDEVDCTL} ${LIBZFS} ${LIBZFS_CORE} ${LIBUTIL} ${LIBGEOM} \
+       ${LIBBSDXML} ${LIBSBUF} ${LIBNVPAIR} ${LIBUUTIL}
+LIBADD=  devdctl zfs zfs_core util geom bsdxml sbuf nvpair uutil
+
+cscope:
+       find ${.CURDIR} -type f -a \( -name "*.[ch]" -o -name "*.cc" \) \
+            > ${.CURDIR}/cscope.files
+       cd ${.CURDIR} && cscope -buq ${INCFLAGS}

Added: head/cddl/usr.sbin/zfsd/callout.cc
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/cddl/usr.sbin/zfsd/callout.cc  Sat May 28 17:43:40 2016        
(r300906)
@@ -0,0 +1,219 @@
+/*-
+ * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
+ *
+ * $FreeBSD$
+ */
+
+/**
+ * \file callout.cc
+ *
+ * \brief Implementation of the Callout class - multi-client
+ *        timer services built on top of the POSIX interval timer.
+ */
+
+#include <sys/time.h>
+
+#include <signal.h>
+#include <syslog.h>
+
+#include <climits>
+#include <list>
+#include <map>
+#include <string>
+
+#include <devdctl/guid.h>
+#include <devdctl/event.h>
+#include <devdctl/event_factory.h>
+#include <devdctl/consumer.h>
+#include <devdctl/exception.h>
+
+#include "callout.h"
+#include "vdev_iterator.h"
+#include "zfsd.h"
+#include "zfsd_exception.h"
+
+std::list<Callout *> Callout::s_activeCallouts;
+bool                Callout::s_alarmFired(false);
+
+void
+Callout::Init()
+{
+       signal(SIGALRM,  Callout::AlarmSignalHandler);
+}
+
+bool
+Callout::Stop()
+{
+       if (!IsPending())
+               return (false);
+
+       for (std::list<Callout *>::iterator it(s_activeCallouts.begin());
+            it != s_activeCallouts.end(); it++) {
+               if (*it != this)
+                       continue;
+
+               it = s_activeCallouts.erase(it);
+               if (it != s_activeCallouts.end()) {
+
+                       /*
+                        * Maintain correct interval for the
+                        * callouts that follow the just removed
+                        * entry.
+                        */
+                       timeradd(&(*it)->m_interval, &m_interval,
+                                &(*it)->m_interval);
+               }
+               break;
+       }
+       m_pending = false;
+       return (true);
+}
+
+bool
+Callout::Reset(const timeval &interval, CalloutFunc_t *func, void *arg)
+{
+       bool cancelled(false);
+
+       if (!timerisset(&interval))
+               throw ZfsdException("Callout::Reset: interval of 0");
+
+       cancelled = Stop();
+
+       m_interval = interval;
+       m_func     = func;
+       m_arg      = arg;
+       m_pending  = true;
+
+       std::list<Callout *>::iterator it(s_activeCallouts.begin());
+       for (; it != s_activeCallouts.end(); it++) {
+
+               if (timercmp(&(*it)->m_interval, &m_interval, <=)) {
+                       /*
+                        * Decrease our interval by those that come
+                        * before us.
+                        */
+                       timersub(&m_interval, &(*it)->m_interval, &m_interval);
+               } else {
+                       /*
+                        * Account for the time between the newly
+                        * inserted event and those that follow.
+                        */
+                       timersub(&(*it)->m_interval, &m_interval,
+                                &(*it)->m_interval);
+                       break;
+               }
+       }
+       s_activeCallouts.insert(it, this);
+
+
+       if (s_activeCallouts.front() == this) {
+               itimerval timerval = { {0, 0}, m_interval };
+
+               setitimer(ITIMER_REAL, &timerval, NULL);
+       }
+
+       return (cancelled);
+}
+
+void
+Callout::AlarmSignalHandler(int)
+{
+       s_alarmFired = true;
+       ZfsDaemon::WakeEventLoop();
+}
+
+void
+Callout::ExpireCallouts()
+{
+       if (!s_alarmFired)
+               return;
+
+       s_alarmFired = false;
+       if (s_activeCallouts.empty()) {
+               /* Callout removal/SIGALRM race was lost. */
+               return;
+       }
+
+       /*
+        * Expire the first callout (the one we used to set the
+        * interval timer) as well as any callouts following that
+        * expire at the same time (have a zero interval from
+        * the callout before it).
+        */
+       do {
+               Callout *cur(s_activeCallouts.front());
+               s_activeCallouts.pop_front();
+               cur->m_pending = false;
+               cur->m_func(cur->m_arg);
+       } while (!s_activeCallouts.empty()
+             && timerisset(&s_activeCallouts.front()->m_interval) == 0);
+
+       if (!s_activeCallouts.empty()) {
+               Callout *next(s_activeCallouts.front());
+               itimerval timerval = { { 0, 0 }, next->m_interval };
+
+               setitimer(ITIMER_REAL, &timerval, NULL);
+       }
+}
+
+timeval
+Callout::TimeRemaining() const
+{
+       /*
+        * Outline: Add the m_interval for each callout in s_activeCallouts
+        * ahead of this, except for the first callout.  Add to that the result
+        * of getitimer (That's because the first callout stores its original
+        * interval setting while the timer is ticking).
+        */
+       itimerval timervalToAlarm;
+       timeval timeToExpiry;
+       std::list<Callout *>::iterator it;
+
+       if (!IsPending()) {
+               timeToExpiry.tv_sec = INT_MAX;
+               timeToExpiry.tv_usec = 999999;  /*maximum normalized value*/
+               return (timeToExpiry);
+       }
+
+       timerclear(&timeToExpiry);
+       getitimer(ITIMER_REAL, &timervalToAlarm);
+       timeval& timeToAlarm = timervalToAlarm.it_value;
+       timeradd(&timeToExpiry, &timeToAlarm, &timeToExpiry);
+
+       it =s_activeCallouts.begin();
+       it++;   /*skip the first callout in the list*/
+       for (; it != s_activeCallouts.end(); it++) {
+               timeradd(&timeToExpiry, &(*it)->m_interval, &timeToExpiry);
+               if ((*it) == this)
+                       break;
+       }
+       return (timeToExpiry);
+}

Added: head/cddl/usr.sbin/zfsd/callout.h
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/cddl/usr.sbin/zfsd/callout.h   Sat May 28 17:43:40 2016        
(r300906)
@@ -0,0 +1,185 @@
+/*-
+ * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
+ *
+ * $FreeBSD$
+ */
+
+/**
+ * \file callout.h
+ *
+ * \brief Interface for timer based callback services.
+ *
+ * Header requirements:
+ *
+ *     #include <sys/time.h>
+ *
+ *     #include <list>
+ */
+
+#ifndef _CALLOUT_H_
+#define _CALLOUT_H_
+
+/**
+ * \brief Type of the function callback from a Callout.
+ */
+typedef void CalloutFunc_t(void *);
+
+/**
+ * \brief Interface to a schedulable one-shot timer with the granularity
+ *        of the system clock (see setitimer(2)).
+ *
+ * Determination of callback expiration is triggered by the SIGALRM
+ * signal.  Callout callbacks are always delivered from Zfsd's event
+ * processing loop.
+ *
+ * Periodic actions can be triggered via the Callout mechanisms by
+ * resetting the Callout from within its callback.
+ */
+class Callout
+{
+public:
+
+       /**
+        * Initialize the Callout subsystem.
+        */
+       static void Init();
+
+       /**
+        * Function called (via SIGALRM) when our interval
+        * timer expires.
+        */
+       static void AlarmSignalHandler(int);
+
+       /**
+        * Execute callbacks for all callouts that have the same
+        * expiration time as the first callout in the list.
+        */
+       static void ExpireCallouts();
+
+       /** Constructor. */
+       Callout();
+
+       /**
+        * Returns true if callout has not been stopped,
+        * or deactivated since the last time the callout was
+        * reset.
+        */
+       bool IsActive() const;
+
+       /**
+        * Returns true if callout is still waiting to expire.
+        */
+       bool IsPending() const;
+
+       /**
+        * Disestablish a callout.
+        */
+       bool Stop();
+
+       /**
+        * \brief Establish or change a timeout.
+        *
+        * \param interval  Timeval indicating the time which must elapse
+        *                  before this callout fires.
+        * \param func      Pointer to the callback funtion
+        * \param arg       Argument pointer to pass to callback function
+        *
+        * \return  Cancellation status.
+        *             true:  The previous callback was pending and therefore
+        *                    was cancelled.
+        *             false: The callout was not pending at the time of this
+        *                    reset request.
+        *          In all cases, a new callout is established.
+        */
+       bool  Reset(const timeval &interval, CalloutFunc_t *func, void *arg);
+
+       /**
+        * \brief Calculate the remaining time until this Callout's timer
+        *        expires.
+        *
+        * The return value will be slightly greater than the actual time to
+        * expiry.
+        *
+        * If the callout is not pending, returns INT_MAX.
+        */
+       timeval TimeRemaining() const;
+
+private:
+       /**
+        * All active callouts sorted by expiration time.  The callout
+        * with the nearest expiration time is at the head of the list.
+        */
+       static std::list<Callout *> s_activeCallouts;
+
+       /**
+        * The interval timer has expired.  This variable is set from
+        * signal handler context and tested from Zfsd::EventLoop()
+        * context via ExpireCallouts().
+        */
+       static bool                 s_alarmFired;
+
+       /**
+        * Time, relative to others in the active list, until
+        * this callout is fired.
+        */
+       timeval                     m_interval;
+
+       /** Callback function argument. */
+       void                       *m_arg;
+
+       /**
+        * The callback function associated with this timer
+        * entry.
+        */
+       CalloutFunc_t              *m_func;
+
+       /** State of this callout. */
+       bool                        m_pending;
+};
+
+//- Callout public const methods ----------------------------------------------
+inline bool
+Callout::IsPending() const
+{
+       return (m_pending);
+}
+
+//- Callout public methods ----------------------------------------------------
+inline
+Callout::Callout()
+ : m_arg(0),
+   m_func(NULL),
+   m_pending(false)
+{
+       timerclear(&m_interval);
+}
+
+#endif /* CALLOUT_H_ */

Added: head/cddl/usr.sbin/zfsd/case_file.cc
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/cddl/usr.sbin/zfsd/case_file.cc        Sat May 28 17:43:40 2016        
(r300906)
@@ -0,0 +1,1104 @@
+/*-
+ * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
+ */
+
+/**
+ * \file case_file.cc
+ *
+ * We keep case files for any leaf vdev that is not in the optimal state.
+ * However, we only serialize to disk those events that need to be preserved
+ * across reboots.  For now, this is just a log of soft errors which we
+ * accumulate in order to mark a device as degraded.
+ */
+#include <sys/cdefs.h>
+#include <sys/time.h>
+
+#include <sys/fs/zfs.h>
+
+#include <dirent.h>
+#include <iomanip>
+#include <fstream>
+#include <functional>
+#include <sstream>
+#include <syslog.h>
+#include <unistd.h>
+
+#include <libzfs.h>
+
+#include <list>
+#include <map>
+#include <string>
+
+#include <devdctl/guid.h>
+#include <devdctl/event.h>
+#include <devdctl/event_factory.h>
+#include <devdctl/exception.h>
+#include <devdctl/consumer.h>
+
+#include "callout.h"
+#include "vdev_iterator.h"
+#include "zfsd_event.h"
+#include "case_file.h"
+#include "vdev.h"
+#include "zfsd.h"
+#include "zfsd_exception.h"
+#include "zpool_list.h"
+
+__FBSDID("$FreeBSD$");
+
+/*============================ Namespace Control 
=============================*/
+using std::auto_ptr;
+using std::hex;
+using std::ifstream;
+using std::stringstream;
+using std::setfill;
+using std::setw;
+
+using DevdCtl::Event;
+using DevdCtl::EventBuffer;
+using DevdCtl::EventFactory;
+using DevdCtl::EventList;
+using DevdCtl::Guid;
+using DevdCtl::ParseException;
+
+/*--------------------------------- CaseFile 
---------------------------------*/
+//- CaseFile Static Data 
-------------------------------------------------------
+
+CaseFileList  CaseFile::s_activeCases;
+const string  CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
+const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
+
+//- CaseFile Static Public Methods 
---------------------------------------------
+CaseFile *
+CaseFile::Find(Guid poolGUID, Guid vdevGUID)
+{
+       for (CaseFileList::iterator curCase = s_activeCases.begin();
+            curCase != s_activeCases.end(); curCase++) {
+
+               if ((*curCase)->PoolGUID() != poolGUID
+                || (*curCase)->VdevGUID() != vdevGUID)
+                       continue;
+
+               /*
+                * We only carry one active case per-vdev.
+                */
+               return (*curCase);
+       }
+       return (NULL);
+}
+
+CaseFile *
+CaseFile::Find(const string &physPath)
+{
+       CaseFile *result = NULL;
+
+       for (CaseFileList::iterator curCase = s_activeCases.begin();
+            curCase != s_activeCases.end(); curCase++) {
+
+               if ((*curCase)->PhysicalPath() != physPath)
+                       continue;
+
+               if (result != NULL) {
+                       syslog(LOG_WARNING, "Multiple casefiles found for "
+                           "physical path %s.  "
+                           "This is most likely a bug in zfsd",
+                           physPath.c_str());
+               }
+               result = *curCase;
+       }
+       return (result);
+}
+
+
+void
+CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
+{
+       CaseFileList::iterator casefile;
+       for (casefile = s_activeCases.begin(); casefile != 
s_activeCases.end();){
+               CaseFileList::iterator next = casefile;
+               next++;
+               if (poolGUID == (*casefile)->PoolGUID())
+                       (*casefile)->ReEvaluate(event);
+               casefile = next;
+       }
+}
+
+CaseFile &
+CaseFile::Create(Vdev &vdev)
+{
+       CaseFile *activeCase;
+
+       activeCase = Find(vdev.PoolGUID(), vdev.GUID());
+       if (activeCase == NULL)
+               activeCase = new CaseFile(vdev);
+
+       return (*activeCase);
+}
+
+void
+CaseFile::DeSerialize()
+{
+       struct dirent **caseFiles;
+
+       int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
+                        DeSerializeSelector, /*compar*/NULL));
+
+       if (numCaseFiles == -1)
+               return;
+       if (numCaseFiles == 0) {
+               free(caseFiles);
+               return;
+       }
+
+       for (int i = 0; i < numCaseFiles; i++) {
+
+               DeSerializeFile(caseFiles[i]->d_name);
+               free(caseFiles[i]);
+       }
+       free(caseFiles);
+}
+
+void
+CaseFile::LogAll()
+{
+       for (CaseFileList::iterator curCase = s_activeCases.begin();
+            curCase != s_activeCases.end(); curCase++)
+               (*curCase)->Log();
+}
+
+void
+CaseFile::PurgeAll()
+{
+       /*
+        * Serialize casefiles before deleting them so that they can be reread
+        * and revalidated during BuildCaseFiles.
+        * CaseFiles remove themselves from this list on destruction.
+        */
+       while (s_activeCases.size() != 0) {
+               CaseFile *casefile = s_activeCases.front();
+               casefile->Serialize();
+               delete casefile;
+       }
+
+}
+
+//- CaseFile Public Methods 
----------------------------------------------------
+bool
+CaseFile::RefreshVdevState()
+{
+       ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
+       zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
+       if (casePool == NULL)
+               return (false);
+
+       Vdev vd(casePool, CaseVdev(casePool));
+       if (vd.DoesNotExist())
+               return (false);
+
+       m_vdevState    = vd.State();
+       m_vdevPhysPath = vd.PhysicalPath();
+       return (true);
+}
+
+bool
+CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
+{
+       ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
+       zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
+
+       if (pool == NULL || !RefreshVdevState()) {
+               /*
+                * The pool or vdev for this case file is no longer
+                * part of the configuration.  This can happen
+                * if we process a device arrival notification
+                * before seeing the ZFS configuration change
+                * event.
+                */
+               syslog(LOG_INFO,
+                      "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured.  "
+                      "Closing\n",
+                      PoolGUIDString().c_str(),
+                      VdevGUIDString().c_str());
+               Close();
+
+               /*
+                * Since this event was not used to close this
+                * case, do not report it as consumed.
+                */
+               return (/*consumed*/false);
+       }
+
+       if (VdevState() > VDEV_STATE_CANT_OPEN) {
+               /*
+                * For now, newly discovered devices only help for
+                * devices that are missing.  In the future, we might
+                * use a newly inserted spare to replace a degraded
+                * or faulted device.
+                */
+               syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev 
ignored",
+                   PoolGUIDString().c_str(), VdevGUIDString().c_str());
+               return (/*consumed*/false);
+       }
+
+       if (vdev != NULL
+        && vdev->PoolGUID() == m_poolGUID
+        && vdev->GUID() == m_vdevGUID) {
+
+               zpool_vdev_online(pool, vdev->GUIDString().c_str(),
+                                 ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE,
+                                 &m_vdevState);
+               syslog(LOG_INFO, "Onlined vdev(%s/%s:%s).  State now %s.\n",
+                      zpool_get_name(pool), vdev->GUIDString().c_str(),
+                      devPath.c_str(),
+                      zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
+
+               /*
+                * Check the vdev state post the online action to see
+                * if we can retire this case.
+                */
+               CloseIfSolved();
+
+               return (/*consumed*/true);
+       }
+
+       /*
+        * If the auto-replace policy is enabled, and we have physical
+        * path information, try a physical path replacement.
+        */
+       if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
+               syslog(LOG_INFO,
+                      "CaseFile(%s:%s:%s): AutoReplace not set.  "
+                      "Ignoring device insertion.\n",
+                      PoolGUIDString().c_str(),
+                      VdevGUIDString().c_str(),
+                      zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
+               return (/*consumed*/false);
+       }
+
+       if (PhysicalPath().empty()) {
+               syslog(LOG_INFO,
+                      "CaseFile(%s:%s:%s): No physical path information.  "
+                      "Ignoring device insertion.\n",
+                      PoolGUIDString().c_str(),
+                      VdevGUIDString().c_str(),
+                      zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
+               return (/*consumed*/false);
+       }
+
+       if (physPath != PhysicalPath()) {
+               syslog(LOG_INFO,
+                      "CaseFile(%s:%s:%s): Physical path mismatch.  "
+                      "Ignoring device insertion.\n",
+                      PoolGUIDString().c_str(),
+                      VdevGUIDString().c_str(),
+                      zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
+               return (/*consumed*/false);
+       }
+
+       /* Write a label on the newly inserted disk. */
+       if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
+               syslog(LOG_ERR,
+                      "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
+                      zpool_get_name(pool), VdevGUIDString().c_str(),
+                      libzfs_error_action(g_zfsHandle),
+                      libzfs_error_description(g_zfsHandle));
+               return (/*consumed*/false);
+       }
+
+       syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
+           PoolGUIDString().c_str(), VdevGUIDString().c_str(),
+           devPath.c_str());
+       return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
+}
+
+bool
+CaseFile::ReEvaluate(const ZfsEvent &event)
+{
+       bool consumed(false);
+
+       if (event.Value("type") == "misc.fs.zfs.vdev_remove") {
+               /*
+                * The Vdev we represent has been removed from the
+                * configuration.  This case is no longer of value.
+                */
+               Close();
+
+               return (/*consumed*/true);
+       } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") {
+               /* This Pool has been destroyed.  Discard the case */
+               Close();
+
+               return (/*consumed*/true);
+       } else if (event.Value("type") == "misc.fs.zfs.config_sync") {
+               RefreshVdevState();
+               if (VdevState() < VDEV_STATE_HEALTHY)
+                       consumed = ActivateSpare();
+       }
+
+
+       if (event.Value("class") == "resource.fs.zfs.removed") {
+               bool spare_activated;
+
+               if (!RefreshVdevState()) {
+                       /*
+                        * The pool or vdev for this case file is no longer
+                        * part of the configuration.  This can happen
+                        * if we process a device arrival notification
+                        * before seeing the ZFS configuration change
+                        * event.
+                        */
+                       syslog(LOG_INFO,
+                              "CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
+                              "unconfigured.  Closing\n",
+                              PoolGUIDString().c_str(),
+                              VdevGUIDString().c_str());
+                       /*
+                        * Close the case now so we won't waste cycles in the
+                        * system rescan
+                        */
+                       Close();
+
+                       /*
+                        * Since this event was not used to close this
+                        * case, do not report it as consumed.
+                        */
+                       return (/*consumed*/false);
+               }
+
+               /*
+                * Discard any tentative I/O error events for
+                * this case.  They were most likely caused by the
+                * hot-unplug of this device.
+                */
+               PurgeTentativeEvents();
+
+               /* Try to activate spares if they are available */
+               spare_activated = ActivateSpare();
+
+               /*
+                * Rescan the drives in the system to see if a recent
+                * drive arrival can be used to solve this case.
+                */
+               ZfsDaemon::RequestSystemRescan();
+
+               /*
+                * Consume the event if we successfully activated a spare.
+                * Otherwise, leave it in the unconsumed events list so that the
+                * future addition of a spare to this pool might be able to
+                * close the case
+                */
+               consumed = spare_activated;
+       } else if (event.Value("class") == "resource.fs.zfs.statechange") {
+               RefreshVdevState();
+               /*
+                * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
+                * activate a hotspare.  Otherwise, ignore the event
+                */
+               if (VdevState() == VDEV_STATE_FAULTED ||
+                   VdevState() == VDEV_STATE_DEGRADED ||
+                   VdevState() == VDEV_STATE_CANT_OPEN)
+                       (void) ActivateSpare();
+               consumed = true;
+       }
+       else if (event.Value("class") == "ereport.fs.zfs.io" ||
+                event.Value("class") == "ereport.fs.zfs.checksum") {
+
+               m_tentativeEvents.push_front(event.DeepCopy());
+               RegisterCallout(event);
+               consumed = true;
+       }
+
+       bool closed(CloseIfSolved());
+
+       return (consumed || closed);
+}
+
+
+bool
+CaseFile::ActivateSpare() {
+       nvlist_t        *config, *nvroot;
+       nvlist_t       **spares;
+       char            *devPath, *vdev_type;
+       const char      *poolname;
+       u_int            nspares, i;
+       int              error;
+
+       ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
+       zpool_handle_t  *zhp(zpl.empty() ? NULL : zpl.front());
+       if (zhp == NULL) {
+               syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
+                      "for pool_guid %"PRIu64".", (uint64_t)m_poolGUID);
+               return (false);
+       }
+       poolname = zpool_get_name(zhp);
+       config = zpool_get_config(zhp, NULL);
+       if (config == NULL) {
+               syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
+                      "config for pool %s", poolname);
+               return (false);
+       }
+       error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
+       if (error != 0){
+               syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
+                      "tree for pool %s", poolname);
+               return (false);
+       }
+       nspares = 0;
+       nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
+                                  &nspares);
+       if (nspares == 0) {
+               /* The pool has no spares configured */
+               syslog(LOG_INFO, "CaseFile::ActivateSpare: "
+                      "No spares available for pool %s", poolname);
+               return (false);
+       }
+       for (i = 0; i < nspares; i++) {
+               uint64_t    *nvlist_array;
+               vdev_stat_t *vs;
+               uint_t       nstats;
+
+               if (nvlist_lookup_uint64_array(spares[i],
+                   ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
+                       syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
+                              "find vdev stats for pool %s, spare %d",
+                              poolname, i);
+                       return (false);
+               }
+               vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
+
+               if ((vs->vs_aux != VDEV_AUX_SPARED)
+                && (vs->vs_state == VDEV_STATE_HEALTHY)) {
+                       /* We found a usable spare */
+                       break;
+               }
+       }
+
+       if (i == nspares) {
+               /* No available spares were found */
+               return (false);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to