Hi

This patch adds two new services:
mr: monitoring service (uses statgrab)
wd: watchdog service

The monitoring service uses libstatgrab to get memory and load
stats (it can do a lot more).
1) It puts the current value into a key "current" alongside the max value.
2) it compares the current to the max
  If current > max it set state = "failed"
  Else it set state = "good"

The watchdog service only does anything if:
1) you have a /dev/watchdog
2) there are resources with recovery == watchdog

It then opens /dev/watchdog and starts a timer 1/2 the timeout of
the watchdog.
At each timeout it only tickles the watchdog if /resources/*/state!=failed

Been an early patch I have the bit that writes state=[good|failed] is 
commented out to make testing easier.

To test (on a VM where you don't mind the machine shutting down):
Add the following to your config:
service {
        name: corosync_mr
        ver: 0
}
service {
        name: corosync_wd
        ver: 0
}
resources {
  system {
    memory_used {
      state: unknown
      # max here is the percent of total mem used
      max: 80
      recovery: watchdog
    }
    load_15min {
      state: unknown
      # max here is the actual loadaverage
      max: 20
      recovery: watchdog
    }
  }
}

Then
$ modprobe softdog
$ /etc/init.d/corosync start

To view the stats
$ corosync-objctl resources.

To trigger a watchdog timeout
$ corosync-objctl -w resources.system.memory_used.state=failed

Regards
Angus

Signed-off-by: Angus Salkeld <[email protected]>
---
 configure.ac                |   30 +++-
 exec/Makefile.am            |    2 +-
 include/corosync/corodefs.h |    4 +-
 services/Makefile.am        |    5 +-
 services/mr.c               |  341 +++++++++++++++++++++++++++++++++++
 services/wd.c               |  420 +++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 791 insertions(+), 11 deletions(-)
 create mode 100644 services/mr.c
 create mode 100644 services/wd.c

diff --git a/configure.ac b/configure.ac
index 59f10fb..99b5666 100644
--- a/configure.ac
+++ b/configure.ac
@@ -182,19 +182,19 @@ 
CONFDB_SONAME="${SOMAJOR}.${CONFDB_SOMINOR}.${CONFDB_SOMICRO}"
 
 # local options
 AC_ARG_ENABLE([ansi],
-       [  --enable-ansi           : force to build with ANSI standards. ],
+       [  --enable-ansi                   : force to build with ANSI 
standards. ],
        [ default="no" ])
 
 AC_ARG_ENABLE([fatal-warnings],
-       [  --enable-fatal-warnings : enable fatal warnings. ],
+       [  --enable-fatal-warnings         : enable fatal warnings. ],
        [ default="no" ])
 
 AC_ARG_ENABLE([debug],
-       [ --enable-debug          : enable debug build. ],
+       [  --enable-debug                  : enable debug build. ],
        [ default="no" ])
 
 AC_ARG_ENABLE([coverage],
-       [  --enable-coverage       : coverage analysis of the codebase. ],
+       [  --enable-coverage               : coverage analysis of the codebase. 
],
        [ default="no" ])
 
 AC_ARG_ENABLE([small-memory-footprint],
@@ -202,20 +202,26 @@ AC_ARG_ENABLE([small-memory-footprint],
        [ default="no" ])
 
 AC_ARG_ENABLE([nss],
-       [  --enable-nss            : Network Security Services encryption. ],,
+       [  --enable-nss                    : Network Security Services 
encryption. ],,
        [ enable_nss="yes" ])
 
 AC_ARG_ENABLE([testagents],
-       [  --enable-testagents            : Install Test Agents. ],,
+       [  --enable-testagents             : Install Test Agents. ],,
        [ default="no" ])
 
 AC_ARG_ENABLE([rdma],
-       [  --enable-rdma           : Infiniband RDMA transport support ],,
+       [  --enable-rdma                   : Infiniband RDMA transport support 
],,
        [ enable_rdma="no" ])
 AM_CONDITIONAL(BUILD_RDMA, test x$enable_rdma = xyes)
 
+AC_ARG_ENABLE([statgrab],
+       [  --enable-statgrab               : statgrab resource monitoring ],,
+       [ default="no" ])
+AM_CONDITIONAL(HAVE_STATGRAB, test x$enable_statgrab = xyes)
+
+
 AC_ARG_ENABLE([augeas],
-       [  --enable-augeas           : Install the augeas lens for 
corosync.conf ],,
+       [  --enable-augeas                 : Install the augeas lens for 
corosync.conf ],,
        [ enable_augeas="no" ])
 AM_CONDITIONAL(INSTALL_AUGEAS, test x$enable_augeas = xyes)
 
@@ -349,6 +355,14 @@ if test "x${enable_rdma}" = xyes; then
        PACKAGE_FEATURES="$PACKAGE_FEATURES rdma"
 fi
 
+if test "x${enable_statgrab}" = xyes; then
+       AC_CHECK_LIB([statgrab], [sg_get_mem_stats])
+       AC_CHECK_HEADERS([statgrab.h])
+       statgrab_LIBS="-lstatgrab"
+       AC_SUBST([statgrab_LIBS])
+       PACKAGE_FEATURES="$PACKAGE_FEATURES statgrab"
+fi
+
 if test "x${enable_augeas}" = xyes; then
        PACKAGE_FEATURES="$PACKAGE_FEATURES augeas"
 fi
diff --git a/exec/Makefile.am b/exec/Makefile.am
index f367f29..7c207c3 100644
--- a/exec/Makefile.am
+++ b/exec/Makefile.am
@@ -59,7 +59,7 @@ libcoroipcs_a_SOURCES = $(COROIPCS_SRC)
 corosync_SOURCES       = main.c util.c sync.c apidef.c service.c \
                          timer.c totemconfig.c mainconfig.c quorum.c 
schedwrk.c \
                          ../lcr/lcr_ifact.c evil.c syncv2.c
-corosync_LDADD         = -ltotem_pg -llogsys -lcoroipcs
+corosync_LDADD         = -ltotem_pg -llogsys -lcoroipcs $(statgrab_LIBS)
 corosync_DEPENDENCIES  = libtotem_pg.so.$(SONAME) liblogsys.so.$(SONAME) 
libcoroipcs.so.$(SONAME)
 corosync_LDFLAGS       = $(OS_DYFLAGS) -L./
 
diff --git a/include/corosync/corodefs.h b/include/corosync/corodefs.h
index 57923e2..8a81327 100644
--- a/include/corosync/corodefs.h
+++ b/include/corosync/corodefs.h
@@ -59,7 +59,9 @@ enum corosync_service_types {
        NTF_SERVICE = 16,
        AMF_V2_SERVICE = 17,
        TST_SV1_SERVICE = 18,
-       TST_SV2_SERVICE = 19
+       TST_SV2_SERVICE = 19,
+       MR_SERVICE = 20,
+       WD_SERVICE = 21
 };
 
 #ifdef HAVE_SMALL_MEMORY_FOOTPRINT
diff --git a/services/Makefile.am b/services/Makefile.am
index bb63336..7c17409 100644
--- a/services/Makefile.am
+++ b/services/Makefile.am
@@ -37,7 +37,10 @@ INCLUDES             = -I$(top_builddir)/include 
-I$(top_srcdir)/include \
                          -I$(top_builddir)/include/corosync \
                          -I$(top_srcdir)/include/corosync
 
-SERVICE_LCRSO          = evs cfg cpg confdb pload
+SERVICE_LCRSO          = evs cfg cpg confdb pload wd
+if HAVE_STATGRAB
+SERVICE_LCRSO          += mr
+endif
 
 QUORUM_LCRSO           = votequorum testquorum
 
diff --git a/services/mr.c b/services/mr.c
new file mode 100644
index 0000000..39cbddf
--- /dev/null
+++ b/services/mr.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * All rights reserved.
+ *
+ * Author: Angus Salkeld <[email protected]>
+ *
+ * This software licensed under BSD license, the text of which follows:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the MontaVista Software, Inc. nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <unistd.h>
+#include <statgrab.h>
+
+#include <corosync/corotypes.h>
+#include <corosync/corodefs.h>
+#include <corosync/lcr/lcr_comp.h>
+#include <corosync/engine/coroapi.h>
+#include <corosync/list.h>
+#include <corosync/engine/logsys.h>
+
+
+LOGSYS_DECLARE_SUBSYS ("MR");
+
+/*
+ * Service Interfaces required by service_message_handler struct
+ */
+static int mr_exec_init_fn (
+       struct corosync_api_v1 *corosync_api);
+
+static void mr_confchg_fn (
+       enum totem_configuration_type configuration_type,
+       const unsigned int *member_list, size_t member_list_entries,
+       const unsigned int *left_list, size_t left_list_entries,
+       const unsigned int *joined_list, size_t joined_list_entries,
+       const struct memb_ring_id *ring_id);
+
+
+static int mr_lib_init_fn (void *conn);
+
+static int mr_lib_exit_fn (void *conn);
+
+static struct corosync_api_v1 *api;
+
+static hdb_handle_t memory_used_obj;
+static hdb_handle_t load_15min_obj;
+static pthread_t mr_poll_thread;
+
+struct corosync_service_engine mr_service_engine = {
+       .name                   = "corosync monitoring and recovery service",
+       .id                     = MR_SERVICE,
+       .priority               = 1,
+       .private_data_size      = 0,
+       .flow_control           = CS_LIB_FLOW_CONTROL_REQUIRED,
+       .lib_init_fn            = mr_lib_init_fn,
+       .lib_exit_fn            = mr_lib_exit_fn,
+       .lib_engine             = NULL,
+       .lib_engine_count       = 0,
+       .exec_engine            = NULL,
+       .exec_engine_count      = 0,
+       .confchg_fn             = mr_confchg_fn,
+       .exec_init_fn           = mr_exec_init_fn,
+       .exec_dump_fn           = NULL,
+       .sync_mode              = CS_SYNC_V2
+};
+
+static DECLARE_LIST_INIT (confchg_notify);
+
+/*
+ * Dynamic loading descriptor
+ */
+
+static struct corosync_service_engine *mr_get_service_engine_ver0 (void);
+
+static struct corosync_service_engine_iface_ver0 mr_service_engine_iface = {
+       .corosync_get_service_engine_ver0       = mr_get_service_engine_ver0
+};
+
+static struct lcr_iface corosync_mr_ver0[1] = {
+       {
+               .name                   = "corosync_mr",
+               .version                = 0,
+               .versions_replace       = 0,
+               .versions_replace_count = 0,
+               .dependencies           = 0,
+               .dependency_count       = 0,
+               .constructor            = NULL,
+               .destructor             = NULL,
+               .interfaces             = NULL,
+       }
+};
+
+static struct lcr_comp mr_comp_ver0 = {
+       .iface_count    = 1,
+       .ifaces         = corosync_mr_ver0
+};
+
+static struct corosync_service_engine *mr_get_service_engine_ver0 (void)
+{
+       return (&mr_service_engine);
+}
+
+#ifdef COROSYNC_SOLARIS
+void corosync_lcr_component_register (void);
+
+void corosync_lcr_component_register (void) {
+#else
+__attribute__ ((constructor)) static void corosync_lcr_component_register 
(void) {
+#endif
+       lcr_interfaces_set (&corosync_mr_ver0[0], &mr_service_engine_iface);
+
+       lcr_component_register (&mr_comp_ver0);
+}
+
+static void update_resource (hdb_handle_t resourse,
+       void *new_value, size_t new_value_size, objdb_value_types_t type)
+{
+       char *max_str;
+       size_t max_str_len;
+       objdb_value_types_t max_type;
+       char failed[] = "failed";
+       char good[] = "good";
+       char * status = good;
+       double maxd;
+       long maxl;
+
+       /* update the current key
+        */
+       api->object_key_replace (resourse,
+               "current", strlen("current"),
+               new_value, new_value_size);
+
+       /* if it exceeds the max value then set the state to failed, else good
+        */
+       if (api->object_key_get_typed (resourse, "max", (void**)&max_str, 
&max_str_len, &max_type) != 0) {
+               return;
+       }
+       switch (type) {
+       case OBJDB_VALUETYPE_DOUBLE:
+               maxd = strtod (max_str, NULL);
+               if (*((double*)new_value) > maxd)
+                       status = failed;
+               break;
+
+       default:
+               maxl = strtol (max_str, NULL, 0);
+               if (*((uint32_t*)new_value) > maxl)
+                       status = failed;
+               break;
+       }
+/*
+       api->object_key_replace (resourse,
+               "state", strlen("state"),
+               status, strlen(status));
+*/
+}
+
+static void *mr_thread_handler (void * unused)
+{
+       sg_mem_stats *mem_stats;
+       sg_swap_stats *swap_stats;
+       long long total, freemem;
+       uint32_t new_value;
+       sg_load_stats *load_stats;
+
+       sg_init();
+
+       while (1) {
+               mem_stats = sg_get_mem_stats();
+               swap_stats = sg_get_swap_stats();
+
+               if (mem_stats != NULL && swap_stats != NULL) {
+                       total = mem_stats->total + swap_stats->total;
+                       freemem = mem_stats->free + swap_stats->free;
+                       new_value = ((total - freemem) * 100) / total;
+                       update_resource (memory_used_obj, &new_value, 
sizeof(new_value), OBJDB_VALUETYPE_UINT32);
+               }
+               else {
+                       log_printf (LOGSYS_LEVEL_ERROR, "Unable to get VM 
stats: %s\n",
+                               sg_str_error(sg_get_error()));
+               }
+
+               load_stats = sg_get_load_stats ();
+               if (load_stats) {
+                       update_resource (load_15min_obj,
+                               &load_stats->min15, sizeof (load_stats->min15), 
OBJDB_VALUETYPE_DOUBLE);
+               }
+               else {
+                       log_printf (LOGSYS_LEVEL_ERROR, "Unable to get VM 
stats: %s\n",
+                               sg_str_error(sg_get_error()));
+               }
+
+               sleep(30);
+               //sleep(5);
+       }
+       return NULL;
+}
+
+static int object_find_or_create (
+       hdb_handle_t parent_object_handle,
+       hdb_handle_t *object_handle,
+       const void *object_name,
+       size_t object_name_len)
+{
+       hdb_handle_t obj_finder;
+       hdb_handle_t obj;
+       int ret = -1;
+
+       api->object_find_create (
+               parent_object_handle,
+               object_name,
+               object_name_len,
+               &obj_finder);
+
+       if (api->object_find_next (obj_finder, &obj) == 0) {
+               /* found it */
+               *object_handle = obj;
+               ret = 0;
+       }
+       else {
+               ret = api->object_create (parent_object_handle,
+                       object_handle,
+                       object_name, object_name_len);
+       }
+
+       api->object_find_destroy (obj_finder);
+       return ret;
+}
+
+
+static int mr_exec_init_fn (
+       struct corosync_api_v1 *corosync_api)
+{
+       hdb_handle_t obj;
+       hdb_handle_t parent;
+       int32_t zero_32 = 0;
+       double zero_double = 0;
+
+       log_printf (LOGSYS_LEVEL_INFO, "%s\n", __func__);
+#ifdef COROSYNC_SOLARIS
+       logsys_subsys_init();
+#endif
+       api = corosync_api;
+
+       parent = OBJECT_PARENT_HANDLE;
+       object_find_or_create (parent,
+               &obj,
+               "resources", strlen ("resources"));
+       parent = obj;
+       object_find_or_create (parent,
+               &obj,
+               "system", strlen ("system"));
+       parent = obj;
+
+       object_find_or_create (parent,
+               &memory_used_obj,
+               "memory_used", strlen ("memory_used"));
+       api->object_key_create_typed (memory_used_obj,
+               "current", &zero_32,
+               sizeof (zero_32), OBJDB_VALUETYPE_UINT32);
+
+       object_find_or_create (parent,
+               &load_15min_obj,
+               "load_15min", strlen ("load_15min"));
+       api->object_key_create_typed (load_15min_obj,
+               "current", &zero_double,
+               sizeof (zero_double), OBJDB_VALUETYPE_DOUBLE);
+
+
+/*
+
+/resourses/
+       system/
+               memory_used/
+                       max = <X>%
+                       current = ()
+               load_15min/
+                       max = <X>%
+               used_inodes/
+                       max = <X>%
+               used_blocks/
+                       max = <X>%
+               net_in_errors/
+                       max = <X>%
+               net_out_errors/
+                       max = <X>%
+
+       processes/
+               <name|pid>
+
+
+*/
+       pthread_create (&mr_poll_thread, NULL, mr_thread_handler, NULL);
+
+       return 0;
+}
+
+static void mr_confchg_fn (
+       enum totem_configuration_type configuration_type,
+       const unsigned int *member_list, size_t member_list_entries,
+       const unsigned int *left_list, size_t left_list_entries,
+       const unsigned int *joined_list, size_t joined_list_entries,
+       const struct memb_ring_id *ring_id)
+{
+}
+
+static int mr_lib_init_fn (void *conn)
+{
+       return (0);
+}
+
+static int mr_lib_exit_fn (void *conn)
+{
+       return (0);
+}
+
diff --git a/services/wd.c b/services/wd.c
new file mode 100644
index 0000000..3fb1db0
--- /dev/null
+++ b/services/wd.c
@@ -0,0 +1,420 @@
+/*
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * All rights reserved.
+ *
+ * Author: Angus Salkeld <[email protected]>
+ *
+ * This software licensed under BSD license, the text of which follows:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the MontaVista Software, Inc. nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <linux/types.h>
+#include <linux/watchdog.h>
+
+#include <corosync/corotypes.h>
+#include <corosync/corodefs.h>
+#include <corosync/lcr/lcr_comp.h>
+#include <corosync/engine/coroapi.h>
+#include <corosync/list.h>
+#include <corosync/engine/logsys.h>
+
+
+typedef enum {
+       WD_RESOURCE_GOOD,
+       WD_RESOURCE_FAILED,
+       WD_RESOURCE_UNKNOWN,
+       WD_RESOURCE_NOT_MONITORED
+} wd_resource_state_t;
+
+
+LOGSYS_DECLARE_SUBSYS ("WD");
+
+/*
+ * Service Interfaces required by service_message_handler struct
+ */
+static int wd_exec_init_fn (
+       struct corosync_api_v1 *corosync_api);
+static int wd_exec_exit_fn (void);
+
+static void wd_confchg_fn (
+       enum totem_configuration_type configuration_type,
+       const unsigned int *member_list, size_t member_list_entries,
+       const unsigned int *left_list, size_t left_list_entries,
+       const unsigned int *joined_list, size_t joined_list_entries,
+       const struct memb_ring_id *ring_id);
+
+
+static int wd_lib_init_fn (void *conn);
+static int wd_lib_exit_fn (void *conn);
+
+static struct corosync_api_v1 *api;
+static uint32_t wd_timeout = 5;
+static int dog;
+static corosync_timer_handle_t wd_timer;
+
+struct corosync_service_engine wd_service_engine = {
+       .name                   = "corosync watchdog fencing service",
+       .id                     = WD_SERVICE,
+       .priority               = 1,
+       .private_data_size      = 0,
+       .flow_control           = CS_LIB_FLOW_CONTROL_REQUIRED,
+       .lib_init_fn            = wd_lib_init_fn,
+       .lib_exit_fn            = wd_lib_exit_fn,
+       .lib_engine             = NULL,
+       .lib_engine_count       = 0,
+       .exec_engine            = NULL,
+       .exec_engine_count      = 0,
+       .confchg_fn             = wd_confchg_fn,
+       .exec_init_fn           = wd_exec_init_fn,
+       .exec_exit_fn           = wd_exec_exit_fn,
+       .exec_dump_fn           = NULL,
+       .sync_mode              = CS_SYNC_V2
+};
+
+static DECLARE_LIST_INIT (confchg_notify);
+
+/*
+ * Dynamic loading descriptor
+ */
+
+static struct corosync_service_engine *wd_get_service_engine_ver0 (void);
+
+static struct corosync_service_engine_iface_ver0 wd_service_engine_iface = {
+       .corosync_get_service_engine_ver0       = wd_get_service_engine_ver0
+};
+
+static struct lcr_iface corosync_wd_ver0[1] = {
+       {
+               .name                   = "corosync_wd",
+               .version                = 0,
+               .versions_replace       = 0,
+               .versions_replace_count = 0,
+               .dependencies           = 0,
+               .dependency_count       = 0,
+               .constructor            = NULL,
+               .destructor             = NULL,
+               .interfaces             = NULL,
+       }
+};
+
+static struct lcr_comp wd_comp_ver0 = {
+       .iface_count    = 1,
+       .ifaces         = corosync_wd_ver0
+};
+
+static struct corosync_service_engine *wd_get_service_engine_ver0 (void)
+{
+       return (&wd_service_engine);
+}
+
+#ifdef COROSYNC_SOLARIS
+void corosync_lcr_component_register (void);
+
+void corosync_lcr_component_register (void) {
+#else
+__attribute__ ((constructor)) static void corosync_lcr_component_register 
(void) {
+#endif
+       lcr_interfaces_set (&corosync_wd_ver0[0], &wd_service_engine_iface);
+
+       lcr_component_register (&wd_comp_ver0);
+}
+
+static wd_resource_state_t resource_state_get (hdb_handle_t resource)
+{
+       int res;
+       char *recov;
+       size_t recov_len;
+       char *state;
+       size_t state_len;
+       objdb_value_types_t type;
+
+       res = api->object_key_get_typed (resource,
+               "recovery", (void*)&recov, &recov_len, &type);
+       if (res != 0) {
+               /* key does not exist.
+                */
+               return WD_RESOURCE_NOT_MONITORED;
+       }
+       res = api->object_key_get_typed (resource,
+               "state", (void*)&state, &state_len, &type);
+       if (res != 0) {
+               /* key does not exist.
+                */
+               return WD_RESOURCE_NOT_MONITORED;
+       }
+
+       if (strcmp (recov, "watchdog") == 0) {
+               if (strcmp (state, "failed") == 0) {
+                       return WD_RESOURCE_FAILED;
+               }
+               else if (strcmp (state, "good") == 0) {
+                       return WD_RESOURCE_GOOD;
+               }
+               else {
+                       return WD_RESOURCE_UNKNOWN;
+               }
+       }
+       return WD_RESOURCE_NOT_MONITORED;
+}
+
+
+static void wd_tickle_fn (void* arg)
+{
+       hdb_handle_t parent;
+       hdb_handle_t obj_finder;
+       hdb_handle_t obj_finder2;
+       hdb_handle_t resources;
+       hdb_handle_t resource_type;
+       hdb_handle_t resource;
+       int res;
+       char object_name[128];
+       size_t object_name_len;
+       char object_name2[128];
+       size_t object_name2_len;
+       int all_ok = 1;
+       wd_resource_state_t state;
+
+       parent = OBJECT_PARENT_HANDLE;
+       api->object_find_create (
+               OBJECT_PARENT_HANDLE,
+               "resources", strlen ("resources"),
+               &obj_finder);
+
+       res = api->object_find_next (obj_finder, &resources);
+       api->object_find_destroy (obj_finder);
+       if (res != 0) {
+               return;
+       }
+
+       /* this will be the system or processes level
+        */
+       api->object_find_create (
+               resources,
+               NULL, 0,
+               &obj_finder);
+       while (api->object_find_next (obj_finder,
+                       &resource_type) == 0) {
+               api->object_name_get (resource_type,
+                       object_name,
+                       &object_name_len);
+
+               api->object_find_create (
+                       resource_type,
+                       NULL, 0,
+                       &obj_finder2);
+
+               while (api->object_find_next (obj_finder2,
+                               &resource) == 0) {
+
+                       api->object_name_get (resource,
+                               object_name2,
+                               &object_name2_len);
+
+                       state = resource_state_get (resource);
+                       if (state == WD_RESOURCE_FAILED) {
+                               all_ok = 0;
+                               log_printf (LOGSYS_LEVEL_CRIT,
+                                       "/resources/%s/%s failed!",
+                                       (char*)object_name, 
(char*)object_name2);
+                       }
+                       else if (state == WD_RESOURCE_GOOD) {
+                               log_printf (LOGSYS_LEVEL_INFO,
+                                       "/resources/%s/%s good.",
+                                       (char*)object_name, 
(char*)object_name2);
+                       }
+                       else {
+                               log_printf (LOGSYS_LEVEL_INFO,
+                                       "/resources/%s/%s not monitored.",
+                                       (char*)object_name, 
(char*)object_name2);
+                       }
+               }
+               api->object_find_destroy (obj_finder2);
+       }
+       api->object_find_destroy (obj_finder);
+
+       if (all_ok) {
+               /* tickle */
+               ioctl(dog, WDIOC_KEEPALIVE, &all_ok);
+
+               log_printf (LOGSYS_LEVEL_INFO,
+                       "all watchdog'ed resources are good.");
+       }
+       else {
+               log_printf (LOGSYS_LEVEL_ALERT,
+                       "all watchdog'ed resources are NOT good, NOT tickling 
the watchdog!");
+       }
+
+       api->timer_add_duration((unsigned long long)wd_timeout*1000000000, NULL,
+                               wd_tickle_fn, &wd_timer);
+}
+
+static int num_resources_need_watchdog(void)
+{
+       hdb_handle_t parent;
+       hdb_handle_t obj_finder;
+       hdb_handle_t obj_finder2;
+       hdb_handle_t resources;
+       hdb_handle_t resource_type;
+       hdb_handle_t resource;
+       int res;
+       int number = 0;
+
+       parent = OBJECT_PARENT_HANDLE;
+       api->object_find_create (
+               OBJECT_PARENT_HANDLE,
+               "resources", strlen ("resources"),
+               &obj_finder);
+
+       res = api->object_find_next (obj_finder, &resources);
+       api->object_find_destroy (obj_finder);
+       if (res != 0) {
+               return number;
+       }
+
+       /* this will be the system or processes level
+        */
+       api->object_find_create (
+               resources,
+               NULL, 0,
+               &obj_finder);
+       while (api->object_find_next (obj_finder,
+                       &resource_type) == 0) {
+
+               api->object_find_create (
+                       resource_type,
+                       NULL, 0,
+                       &obj_finder2);
+
+               while (api->object_find_next (obj_finder2,
+                               &resource) == 0) {
+                       if (resource_state_get (resource) != 
WD_RESOURCE_NOT_MONITORED) {
+                               number++;
+                       }
+               }
+               api->object_find_destroy (obj_finder2);
+       }
+       api->object_find_destroy (obj_finder);
+
+       return number;
+}
+
+static int setup_watchdog(void)
+{
+       struct watchdog_info ident;
+
+       if (access ("/dev/watchdog", W_OK) != 0) {
+               log_printf (LOGSYS_LEVEL_WARNING, "No Watchdog, try modprobe <a 
watchdog>");
+               return -1;
+       }
+
+       /* here goes, lets hope they have "Magic Close"
+        */
+       dog = open("/dev/watchdog", O_WRONLY);
+
+       if (dog == -1) {
+               log_printf (LOGSYS_LEVEL_WARNING, "Watchdog exists but couldn't 
be opened.");
+               return -1;
+       }
+
+       /* Right we have the dog.
+        * Lets see what breed it is.
+        */
+
+       ioctl(dog, WDIOC_GETSUPPORT, &ident);
+       log_printf (LOGSYS_LEVEL_INFO, "Watchdog is now been tickled by 
corosync.");
+       log_printf (LOGSYS_LEVEL_DEBUG, "%s", ident.identity);
+
+       if (ident.options & WDIOF_SETTIMEOUT) {
+               /* yay! the dog is trained.
+                */
+               /* TODO set this up from a config option */
+       }
+       ioctl(dog, WDIOC_GETTIMEOUT, &wd_timeout);
+       log_printf (LOGSYS_LEVEL_DEBUG, "The timeout is %d seconds\n", 
wd_timeout);
+       wd_timeout = wd_timeout / 2;
+
+       ioctl(dog, WDIOC_SETOPTIONS, WDIOS_ENABLECARD);
+       return 0;
+}
+
+static int wd_exec_init_fn (
+       struct corosync_api_v1 *corosync_api)
+{
+
+#ifdef COROSYNC_SOLARIS
+       logsys_subsys_init();
+#endif
+       log_printf (LOGSYS_LEVEL_INFO, "%s\n", __func__);
+       api = corosync_api;
+
+       if (num_resources_need_watchdog() == 0)
+               return -1;
+
+       /* this will setup wd_timeout */
+       if (setup_watchdog() != 0)
+               return -1;
+
+       api->timer_add_duration((unsigned long long)wd_timeout*1000000000, NULL,
+                               wd_tickle_fn, &wd_timer);
+
+       return 0;
+}
+
+static int wd_exec_exit_fn (void)
+{
+       char magic = 'V';
+       if (dog > 0) {
+               log_printf (LOGSYS_LEVEL_INFO, "%s: magic close.\n", __func__);
+               write (dog, &magic, 1);
+       }
+       return 0;
+}
+
+static void wd_confchg_fn (
+       enum totem_configuration_type configuration_type,
+       const unsigned int *member_list, size_t member_list_entries,
+       const unsigned int *left_list, size_t left_list_entries,
+       const unsigned int *joined_list, size_t joined_list_entries,
+       const struct memb_ring_id *ring_id)
+{
+}
+
+static int wd_lib_init_fn (void *conn)
+{
+       return (0);
+}
+
+static int wd_lib_exit_fn (void *conn)
+{
+       return (0);
+}
+
-- 
1.6.6.1


_______________________________________________
Openais mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/openais

Reply via email to