This patch looks good but there are some poll apis used which are available via coroapi.
Regards -steve On 09/23/2010 06:55 PM, Angus Salkeld wrote: > Signed-off-by: Angus Salkeld<[email protected]> > --- > configure.ac | 36 ++ > corosync.spec.in | 14 + > exec/Makefile.am | 2 +- > exec/service.c | 12 + > include/corosync/corodefs.h | 4 +- > services/Makefile.am | 6 + > services/mon.c | 635 ++++++++++++++++++++++++++++++++++++ > services/wd.c | 755 > +++++++++++++++++++++++++++++++++++++++++++ > 8 files changed, 1462 insertions(+), 2 deletions(-) > create mode 100644 services/mon.c > create mode 100644 services/wd.c > > diff --git a/configure.ac b/configure.ac > index b57fdd2..ad4b6c1 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -252,6 +252,16 @@ AC_ARG_ENABLE([rdma], > [ enable_rdma="no" ]) > AM_CONDITIONAL(BUILD_RDMA, test x$enable_rdma = xyes) > > +AC_ARG_ENABLE([monitoring], > + [ --enable-monitoring : resource monitoring ],, > + [ default="no" ]) > +AM_CONDITIONAL(BUILD_MONITORING, test x$enable_monitoring = xyes) > + > +AC_ARG_ENABLE([watchdog], > + [ --enable-watchdog : Watchdog support ],, > + [ edefault="no" ]) > +AM_CONDITIONAL(BUILD_WATCHDOG, test x$enable_watchdog = xyes) > + > AC_ARG_ENABLE([augeas], > [ --enable-augeas : Install the augeas lens for > corosync.conf ],, > [ enable_augeas="no" ]) > @@ -277,6 +287,7 @@ AC_ARG_WITH([socket-dir], > # THIS SECTION MUST DIE! > CP=cp > OS_LDL="-ldl" > +have_linux="no" > case "$host_os" in > *linux*) > AC_DEFINE_UNQUOTED([COROSYNC_LINUX], [1], > @@ -286,6 +297,7 @@ case "$host_os" in > OS_LDFLAGS="" > OS_DYFLAGS="-rdynamic" > DARWIN_OPTS="" > + have_linux="yes" > ;; > darwin*) > AC_DEFINE_UNQUOTED([COROSYNC_DARWIN], [1], > @@ -387,6 +399,30 @@ if test "x${enable_rdma}" = xyes; then > PACKAGE_FEATURES="$PACKAGE_FEATURES rdma" > fi > > +if test "x${enable_monitoring}" = xyes; then > + > + AC_CHECK_LIB([statgrab], [sg_get_mem_stats], have_libstatgrab="yes", > have_libstatgrab="no") > + > + if test "x${have_libstatgrab}" = xyes; then > + AC_DEFINE_UNQUOTED([HAVE_LIBSTATGRAB], 1, [have libstatgrab]) > + statgrab_LIBS="-lstatgrab" > + else > + if test "x${have_linux}" = xno; then > + AC_MSG_ERROR(monitoring requires libstatgrab on > non-linux systems) > + fi > + fi > + AC_SUBST([statgrab_LIBS]) > + AC_DEFINE_UNQUOTED([HAVE_MONITORING], 1, [have resource monitoring]) > + PACKAGE_FEATURES="$PACKAGE_FEATURES monitoring" > +fi > + > +if test "x${enable_watchdog}" = xyes; then > + AC_CHECK_HEADER(linux/watchdog.h,,AC_MSG_ERROR(watchdog requires > linux/watchdog.h)) > + AC_CHECK_HEADER(linux/reboot.h,,AC_MSG_ERROR(watchdog requires > linux/reboot.h)) > + AC_DEFINE_UNQUOTED([HAVE_WATCHDOG], 1, [have watchdog]) > + PACKAGE_FEATURES="$PACKAGE_FEATURES watchdog" > +fi > + > if test "x${enable_augeas}" = xyes; then > PACKAGE_FEATURES="$PACKAGE_FEATURES augeas" > fi > diff --git a/corosync.spec.in b/corosync.spec.in > index dafdb3c..ed531c3 100644 > --- a/corosync.spec.in > +++ b/corosync.spec.in > @@ -5,6 +5,8 @@ > # Invoke "rpmbuild --without<feature>" or "rpmbuild --with<feature>" > # to disable or enable specific features > %bcond_with testagents > +%bcond_with watchdog > +%bcond_with monitoring > > Name: corosync > Summary: The Corosync Cluster Engine and Application Programming Interfaces > @@ -52,6 +54,12 @@ export rdmacm_LIBS=-lrdmacm \ > %if %{with testagents} > --enable-testagents \ > %endif > +%if %{with watchdog} > + --enable-watchdog \ > +%endif > +%if %{with monitoring} > + --enable-monitoring \ > +%endif > --enable-rdma \ > --with-initddir=%{_initrddir} > > @@ -115,6 +123,12 @@ fi > %{_libexecdir}/lcrso/quorum_testquorum.lcrso > %{_libexecdir}/lcrso/vsf_quorum.lcrso > %{_libexecdir}/lcrso/vsf_ykd.lcrso > +%if %{with watchdog} > +%{_libexecdir}/lcrso/service_wd.lcrso > +%endif > +%if %{with monitoring} > +%{_libexecdir}/lcrso/service_mon.lcrso > +%endif > %dir %{_localstatedir}/lib/corosync > %dir %{_localstatedir}/log/cluster > %{_mandir}/man8/corosync_overview.8* > diff --git a/exec/Makefile.am b/exec/Makefile.am > index a3a49bf..938237c 100644 > --- a/exec/Makefile.am > +++ b/exec/Makefile.am > @@ -59,7 +59,7 @@ libcoroipcs_a_SOURCES = $(COROIPCS_SRC) > corosync_SOURCES = main.c util.c sync.c apidef.c service.c \ > timer.c totemconfig.c mainconfig.c quorum.c > schedwrk.c \ > ../lcr/lcr_ifact.c evil.c syncv2.c > -corosync_LDADD = -ltotem_pg -llogsys -lcoroipcs > +corosync_LDADD = -ltotem_pg -llogsys -lcoroipcs > $(statgrab_LIBS) > corosync_DEPENDENCIES = libtotem_pg.so.$(SONAME) > liblogsys.so.$(SONAME) libcoroipcs.so.$(SONAME) > corosync_LDFLAGS = $(OS_DYFLAGS) -L./ > > diff --git a/exec/service.c b/exec/service.c > index be55459..dc30406 100644 > --- a/exec/service.c > +++ b/exec/service.c > @@ -85,6 +85,18 @@ static struct default_service default_services[] = { > .name = "corosync_pload", > .ver = 0, > }, > +#ifdef HAVE_MONITORING > + { > + .name = "corosync_mon", > + .ver = 0, > + }, > +#endif > +#ifdef HAVE_WATCHDOG > + { > + .name = "corosync_wd", > + .ver = 0, > + }, > +#endif > { > .name = "corosync_quorum", > .ver = 0, > diff --git a/include/corosync/corodefs.h b/include/corosync/corodefs.h > index 57923e2..a1e6539 100644 > --- a/include/corosync/corodefs.h > +++ b/include/corosync/corodefs.h > @@ -59,7 +59,9 @@ enum corosync_service_types { > NTF_SERVICE = 16, > AMF_V2_SERVICE = 17, > TST_SV1_SERVICE = 18, > - TST_SV2_SERVICE = 19 > + TST_SV2_SERVICE = 19, > + MON_SERVICE = 20, > + WD_SERVICE = 21 > }; > > #ifdef HAVE_SMALL_MEMORY_FOOTPRINT > diff --git a/services/Makefile.am b/services/Makefile.am > index cb64016..f39adc3 100644 > --- a/services/Makefile.am > +++ b/services/Makefile.am > @@ -38,6 +38,12 @@ INCLUDES = -I$(top_builddir)/include > -I$(top_srcdir)/include \ > -I$(top_srcdir)/include/corosync > > SERVICE_LCRSO = evs cfg cpg confdb pload > +if BUILD_WATCHDOG > +SERVICE_LCRSO += wd > +endif > +if BUILD_MONITORING > +SERVICE_LCRSO += mon > +endif > > QUORUM_LCRSO = votequorum testquorum > > diff --git a/services/mon.c b/services/mon.c > new file mode 100644 > index 0000000..3e475a1 > --- /dev/null > +++ b/services/mon.c > @@ -0,0 +1,635 @@ > +/* > + * Copyright (c) 2010 Red Hat, Inc. > + * > + * All rights reserved. > + * > + * Author: Angus Salkeld<[email protected]> > + * > + * This software licensed under BSD license, the text of which follows: > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions are > met: > + * > + * - Redistributions of source code must retain the above copyright notice, > + * this list of conditions and the following disclaimer. > + * - Redistributions in binary form must reproduce the above copyright > notice, > + * this list of conditions and the following disclaimer in the > documentation > + * and/or other materials provided with the distribution. > + * - Neither the name of the MontaVista Software, Inc. nor the names of its > + * contributors may be used to endorse or promote products derived from > this > + * software without specific prior written permission. > + * > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS > IS" > + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE > + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE > + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE > + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR > + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF > + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS > + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN > + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) > + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF > + * THE POSSIBILITY OF SUCH DAMAGE. > + */ > + > +#include<config.h> > + > +#include<unistd.h> > +#if defined(HAVE_LIBSTATGRAB) > +#include<statgrab.h> > +#endif > + > +#include<corosync/corotypes.h> > +#include<corosync/corodefs.h> > +#include<corosync/lcr/lcr_comp.h> > +#include<corosync/engine/coroapi.h> > +#include<corosync/list.h> > +#include<corosync/totem/coropoll.h> > +#include<corosync/engine/logsys.h> > +#include "../exec/fsm.h" > + > + > +LOGSYS_DECLARE_SUBSYS ("MON"); > + > +#undef ENTER > +#define ENTER() log_printf (LOGSYS_LEVEL_INFO, "%s", __func__) > + > +/* > + * Service Interfaces required by service_message_handler struct > + */ > +static int mon_exec_init_fn ( > + struct corosync_api_v1 *corosync_api); > + > +hdb_handle_t mon_poll = 0; > +static struct corosync_api_v1 *api; > +static hdb_handle_t resources_obj; > +static pthread_t mon_poll_thread; > +#define MON_DEFAULT_PERIOD 3 > + > +struct corosync_service_engine mon_service_engine = { > + .name = "corosync resource monitoring service", > + .id = MON_SERVICE, > + .priority = 1, > + .private_data_size = 0, > + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED, > + .lib_init_fn = NULL, > + .lib_exit_fn = NULL, > + .lib_engine = NULL, > + .lib_engine_count = 0, > + .exec_engine = NULL, > + .exec_engine_count = 0, > + .confchg_fn = NULL, > + .exec_init_fn = mon_exec_init_fn, > + .exec_dump_fn = NULL, > + .sync_mode = CS_SYNC_V2 > +}; > + > +static DECLARE_LIST_INIT (confchg_notify); > + > + > +struct resource_instance { > + hdb_handle_t handle; > + const char *name; > + poll_timer_handle timer_handle; > + void (*update_stats_fn) (void *data); > + struct cs_fsm fsm; > + int32_t period; > + objdb_value_types_t max_type; > + union { > + int32_t int32; > + double dbl; > + } max; > +}; > + > +static void mem_update_stats_fn (void *data); > +static void load_update_stats_fn (void *data); > + > +static struct resource_instance memory_used_inst = { > + .name = "memory_used", > + .update_stats_fn = mem_update_stats_fn, > + .max_type = OBJDB_VALUETYPE_INT32, > + .max.int32 = INT32_MAX, > + .period = MON_DEFAULT_PERIOD, > +}; > + > +static struct resource_instance load_15min_inst = { > + .name = "load_15min", > + .update_stats_fn = load_update_stats_fn, > + .max_type = OBJDB_VALUETYPE_DOUBLE, > + .max.dbl = INT32_MAX, > + .period = MON_DEFAULT_PERIOD, > +}; > + > + > +/* > + * F S M > + */ > +static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * > data); > +static void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * > data); > + > +const char * mon_ok_str = "ok"; > +const char * mon_failed_str = "failed"; > +const char * mon_failure_str = "failure"; > +const char * mon_disabled_str = "disabled"; > +const char * mon_config_changed_str = "config_changed"; > + > +enum mon_resource_state { > + MON_S_DISABLED, > + MON_S_OK, > + MON_S_FAILED > +}; > +enum mon_resource_event { > + MON_E_CONFIG_CHANGED, > + MON_E_FAILURE > +}; > + > +struct cs_fsm_entry mon_fsm_table[] = { > + { MON_S_DISABLED, MON_E_CONFIG_CHANGED, mon_config_changed, > {MON_S_DISABLED, MON_S_OK, -1} }, > + { MON_S_DISABLED, MON_E_FAILURE, NULL, > {-1} }, > + { MON_S_OK, MON_E_CONFIG_CHANGED, mon_config_changed, > {MON_S_OK, MON_S_DISABLED, -1} }, > + { MON_S_OK, MON_E_FAILURE, mon_resource_failed, > {MON_S_FAILED, -1} }, > + { MON_S_FAILED, MON_E_CONFIG_CHANGED, mon_config_changed, > {MON_S_OK, MON_S_DISABLED, -1} }, > + { MON_S_FAILED, MON_E_FAILURE, NULL, > {-1} }, > +}; > + > +/* > + * Dynamic loading descriptor > + */ > + > +static struct corosync_service_engine *mon_get_service_engine_ver0 (void); > + > +static struct corosync_service_engine_iface_ver0 mon_service_engine_iface = { > + .corosync_get_service_engine_ver0 = mon_get_service_engine_ver0 > +}; > + > +static struct lcr_iface corosync_mon_ver0[1] = { > + { > + .name = "corosync_mon", > + .version = 0, > + .versions_replace = 0, > + .versions_replace_count = 0, > + .dependencies = 0, > + .dependency_count = 0, > + .constructor = NULL, > + .destructor = NULL, > + .interfaces = NULL, > + } > +}; > + > +static struct lcr_comp mon_comp_ver0 = { > + .iface_count = 1, > + .ifaces = corosync_mon_ver0 > +}; > + > +static struct corosync_service_engine *mon_get_service_engine_ver0 (void) > +{ > + return (&mon_service_engine); > +} > + > +#ifdef COROSYNC_SOLARIS > +void corosync_lcr_component_register (void); > + > +void corosync_lcr_component_register (void) { > +#else > +__attribute__ ((constructor)) static void corosync_lcr_component_register > (void) { > +#endif > + lcr_interfaces_set (&corosync_mon_ver0[0],&mon_service_engine_iface); > + > + lcr_component_register (&mon_comp_ver0); > +} > + > +static const char * mon_res_state_to_str(struct cs_fsm* fsm, > + int32_t state) > +{ > + switch (state) { > + case MON_S_DISABLED: > + return mon_disabled_str; > + break; > + case MON_S_OK: > + return mon_ok_str; > + break; > + case MON_S_FAILED: > + return mon_failed_str; > + break; > + } > + return NULL; > +} > + > +static const char * mon_res_event_to_str(struct cs_fsm* fsm, > + int32_t event) > +{ > + switch (event) { > + case MON_E_CONFIG_CHANGED: > + return mon_config_changed_str; > + break; > + case MON_E_FAILURE: > + return mon_failure_str; > + break; > + } > + return NULL; > +} > + > +static void mon_fsm_state_set (struct cs_fsm* fsm, > + enum mon_resource_state next_state, struct resource_instance* inst) > +{ > + enum mon_resource_state prev_state = fsm->curr_state; > + const char *state_str; > + > + ENTER(); > + > + cs_fsm_state_set(fsm, next_state, inst); > + > + if (prev_state == fsm->curr_state) { > + return; > + } > + state_str = mon_res_state_to_str(fsm, fsm->curr_state); > + > + api->object_key_replace (inst->handle, > + "state", strlen ("state"), > + state_str, strlen (state_str)); > +} > + > + > +static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * > data) > +{ > + struct resource_instance * inst = (struct resource_instance *)data; > + char *str; > + size_t str_len; > + objdb_value_types_t type; > + int32_t tmp_value; > + int32_t res; > + > + ENTER(); > + > + res = api->object_key_get_typed (inst->handle, > + "poll_period", > + (void**)&str,&str_len, > + &type); > + if (res == 0) { > + tmp_value = strtol (str, NULL, 0); > + if (tmp_value> 0&& tmp_value< 120) { > + if (inst->period != tmp_value) { > + inst->period = tmp_value; > + } > + } > + } > + > + res = api->object_key_get_typed (inst->handle, "max", > + (void**)&str,&str_len,&type); > + if (res != 0) { > + if (inst->max_type == OBJDB_VALUETYPE_INT32) { > + inst->max.int32 = INT32_MAX; > + } else > + if (inst->max_type == OBJDB_VALUETYPE_DOUBLE) { > + inst->max.dbl = INT32_MAX; > + } > + mon_fsm_state_set (fsm, MON_S_DISABLED, inst); > + } else { > + if (inst->max_type == OBJDB_VALUETYPE_INT32) { > + inst->max.int32 = strtol (str, NULL, 0); > + } else > + if (inst->max_type == OBJDB_VALUETYPE_DOUBLE) { > + inst->max.dbl = strtod (str, NULL); > + } > + mon_fsm_state_set (fsm, MON_S_OK, inst); > + } > + > + if (mon_poll == 0) { > + return; > + } > + poll_timer_delete (mon_poll, inst->timer_handle); > + /* > + * run the updater, incase the period has shortened > + */ > + inst->update_stats_fn (inst); > + poll_timer_add (mon_poll, > + inst->period * 1000, NULL, > + inst->update_stats_fn, > + &inst->timer_handle); > +} > + > +void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data) > +{ > + struct resource_instance * inst = (struct resource_instance *)data; > + ENTER(); > + mon_fsm_state_set (fsm, MON_S_FAILED, inst); > +} > + > +static int32_t percent_mem_used_get(void) > +{ > +#if defined(HAVE_LIBSTATGRAB) > + sg_mem_stats *mem_stats; > + sg_swap_stats *swap_stats; > + long long total, freemem; > + > + mem_stats = sg_get_mem_stats(); > + swap_stats = sg_get_swap_stats(); > + > + if (mem_stats == NULL || swap_stats != NULL) { > + log_printf (LOGSYS_LEVEL_ERROR, "Unable to get memory stats: > %s\n", > + sg_str_error(sg_get_error())); > + return -1; > + } > + total = mem_stats->total + swap_stats->total; > + freemem = mem_stats->free + swap_stats->free; > + return ((total - freemem) * 100) / total; > +#else > +#if defined(COROSYNC_LINUX) > + char *line_ptr; > + char line[512]; > + unsigned long long value; > + FILE *f; > + long long total = 0; > + long long freemem = 0; > + > + if ((f = fopen("/proc/meminfo", "r")) == NULL) { > + return -1; > + } > + > + while ((line_ptr = fgets(line, sizeof(line), f)) != NULL) { > + if (sscanf(line_ptr, "%*s %llu kB",&value) != 1) { > + continue; > + } > + value *= 1024; > + > + if (strncmp(line_ptr, "MemTotal:", 9) == 0) { > + total += value; > + } else if (strncmp(line_ptr, "MemFree:", 8) == 0) { > + freemem += value; > + } else if (strncmp(line_ptr, "SwapTotal:", 10) == 0) { > + total += value; > + } else if (strncmp(line_ptr, "SwapFree:", 9) == 0) { > + freemem += value; > + } > + } > + > + fclose(f); > + return ((total - freemem) * 100) / total; > +#else > +#error need libstatgrab or linux. > +#endif /* COROSYNC_LINUX */ > +#endif /* HAVE_LIBSTATGRAB */ > +} > + > + > +static void mem_update_stats_fn (void *data) > +{ > + struct resource_instance * inst = (struct resource_instance *)data; > + int32_t new_value; > + uint64_t timestamp; > + > + new_value = percent_mem_used_get(); > + if (new_value> 0) { > + api->object_key_replace (inst->handle, > + "current", strlen("current"), > + &new_value, sizeof(new_value)); > + > + timestamp = time (NULL); > + > + api->object_key_replace (inst->handle, > + "last_updated", strlen("last_updated"), > + ×tamp, sizeof(time_t)); > + > + if (new_value> inst->max.int32) { > + cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst); > + } > + } > + poll_timer_add (mon_poll, > + inst->period * 1000, inst, > + inst->update_stats_fn, > + &inst->timer_handle); > +} > + > +static double min15_loadavg_get(void) > +{ > +#if defined(HAVE_LIBSTATGRAB) > + sg_load_stats *load_stats; > + load_stats = sg_get_load_stats (); > + if (load_stats == NULL) { > + log_printf (LOGSYS_LEVEL_ERROR, "Unable to get load stats: > %s\n", > + sg_str_error (sg_get_error())); > + return -1; > + } > + return load_stats->min15; > +#else > +#if defined(COROSYNC_LINUX) > + double loadav[3]; > + if (getloadavg(loadav,3)< 0) { > + return -1; > + } > + return loadav[2]; > +#else > +#error need libstatgrab or linux. > +#endif /* COROSYNC_LINUX */ > +#endif /* HAVE_LIBSTATGRAB */ > +} > + > +static void load_update_stats_fn (void *data) > +{ > + struct resource_instance * inst = (struct resource_instance *)data; > + uint64_t timestamp; > + int32_t res = 0; > + double min15 = min15_loadavg_get(); > + > + if (min15< 0) { > + } > + res = api->object_key_replace (inst->handle, > + "current", strlen("current"), > + &min15, sizeof (min15)); > + if (res != 0) > + log_printf (LOGSYS_LEVEL_ERROR, "replace current failed: %d", > res); > + > + timestamp = cs_timestamp_get(); > + > + res = api->object_key_replace (inst->handle, > + "last_updated", strlen("last_updated"), > + ×tamp, sizeof(uint64_t)); > + if (res != 0) > + log_printf (LOGSYS_LEVEL_ERROR, "replace last_updated failed: > %d", res); > + > + if (min15> inst->max.dbl) { > + cs_fsm_process (&inst->fsm, MON_E_FAILURE,&inst); > + } > + > + poll_timer_add (mon_poll, > + inst->period * 1000, inst, > + inst->update_stats_fn, > + &inst->timer_handle); > +} > + > +static void *mon_thread_handler (void * unused) > +{ > +#ifdef HAVE_LIBSTATGRAB > + sg_init(); > +#endif /* HAVE_LIBSTATGRAB */ > + mon_poll = poll_create (); > + > + poll_timer_add (mon_poll, > + memory_used_inst.period * 1000, > + &memory_used_inst, > + memory_used_inst.update_stats_fn, > + &memory_used_inst.timer_handle); > + > + poll_timer_add (mon_poll, > + load_15min_inst.period * 1000, > + &load_15min_inst, > + load_15min_inst.update_stats_fn, > + &load_15min_inst.timer_handle); > + poll_run (mon_poll); > + > + return NULL; > +} > + > +static int object_find_or_create ( > + hdb_handle_t parent_object_handle, > + hdb_handle_t *object_handle, > + const void *object_name, > + size_t object_name_len) > +{ > + hdb_handle_t obj_finder; > + hdb_handle_t obj; > + int ret = -1; > + > + api->object_find_create ( > + parent_object_handle, > + object_name, > + object_name_len, > + &obj_finder); > + > + if (api->object_find_next (obj_finder,&obj) == 0) { > + /* found it */ > + *object_handle = obj; > + ret = 0; > + } > + else { > + ret = api->object_create (parent_object_handle, > + object_handle, > + object_name, object_name_len); > + } > + > + api->object_find_destroy (obj_finder); > + return ret; > +} > + > +static void mon_key_change_notify (object_change_type_t change_type, > + hdb_handle_t parent_object_handle, > + hdb_handle_t object_handle, > + const void *object_name_pt, size_t object_name_len, > + const void *key_name_pt, size_t key_len, > + const void *key_value_pt, size_t key_value_len, > + void *priv_data_pt) > +{ > + struct resource_instance* inst = (struct > resource_instance*)priv_data_pt; > + > + if ((strcmp ((char*)key_name_pt, "max") == 0) || > + (strcmp ((char*)key_name_pt, "poll_period") == 0)) { > + ENTER(); > + cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst); > + } > +} > + > +static void mon_instance_init (hdb_handle_t parent, struct > resource_instance* inst) > +{ > + int32_t res; > + char mon_period_str[32]; > + size_t mon_period_len; > + objdb_value_types_t mon_period_type; > + int32_t tmp_value; > + int32_t zero_32 = 0; > + time_t zero_64 = 0; > + double zero_double = 0; > + > + ENTER(); > + > + object_find_or_create (parent, > + &inst->handle, > + inst->name, strlen (inst->name)); > + > + if (inst->max_type == OBJDB_VALUETYPE_INT32) { > + api->object_key_create_typed (inst->handle, > + "current",&zero_32, > + sizeof (zero_32), inst->max_type); > + } else { > + api->object_key_create_typed (inst->handle, > + "current",&zero_double, > + sizeof (zero_double), inst->max_type); > + } > + > + api->object_key_create_typed (inst->handle, > + "last_updated",&zero_64, > + sizeof (time_t), OBJDB_VALUETYPE_INT64); > + > + api->object_key_create_typed (inst->handle, > + "state", mon_disabled_str, strlen (mon_disabled_str), > + OBJDB_VALUETYPE_STRING); > + > + inst->fsm.name = inst->name; > + inst->fsm.curr_entry = 0; > + inst->fsm.curr_state = MON_S_DISABLED; > + inst->fsm.table = mon_fsm_table; > + inst->fsm.entries = sizeof(mon_fsm_table) / sizeof(struct cs_fsm_entry); > + inst->fsm.state_to_str = mon_res_state_to_str; > + inst->fsm.event_to_str = mon_res_event_to_str; > + > + res = api->object_key_get_typed (inst->handle, > + "poll_period", > + (void**)&mon_period_str,&mon_period_len, > + &mon_period_type); > + if (res != 0) { > + mon_period_len = snprintf (mon_period_str, 32, "%d", > + inst->period); > + api->object_key_create_typed (inst->handle, > + "poll_period",&mon_period_str, > + mon_period_len, > + OBJDB_VALUETYPE_STRING); > + } > + else { > + tmp_value = strtol (mon_period_str, NULL, 0); > + if (tmp_value> 0&& tmp_value< 120) > + inst->period = tmp_value; > + } > + cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst); > + > + poll_timer_add (mon_poll, > + inst->period * 1000, inst, > + inst->update_stats_fn, > + &inst->timer_handle); > + > + api->object_track_start (inst->handle, OBJECT_TRACK_DEPTH_ONE, > + mon_key_change_notify, > + NULL, NULL, NULL, NULL); > + > +} > + > +static int mon_exec_init_fn ( > + struct corosync_api_v1 *corosync_api) > +{ > + hdb_handle_t obj; > + hdb_handle_t parent; > + > +#ifdef COROSYNC_SOLARIS > + logsys_subsys_init(); > +#endif > + api = corosync_api; > + ENTER(); > + > + object_find_or_create (OBJECT_PARENT_HANDLE, > + &resources_obj, > + "resources", strlen ("resources")); > + > + object_find_or_create (resources_obj, > + &obj, > + "system", strlen ("system")); > + > + parent = obj; > + > + mon_instance_init (parent,&memory_used_inst); > + mon_instance_init (parent,&load_15min_inst); > + > + > + pthread_create (&mon_poll_thread, NULL, mon_thread_handler, NULL); > + > + return 0; > +} > + > + > diff --git a/services/wd.c b/services/wd.c > new file mode 100644 > index 0000000..9c9ad97 > --- /dev/null > +++ b/services/wd.c > @@ -0,0 +1,755 @@ > +/* > + * Copyright (c) 2010 Red Hat, Inc. > + * > + * All rights reserved. > + * > + * Author: Angus Salkeld<[email protected]> > + * > + * This software licensed under BSD license, the text of which follows: > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions are > met: > + * > + * - Redistributions of source code must retain the above copyright notice, > + * this list of conditions and the following disclaimer. > + * - Redistributions in binary form must reproduce the above copyright > notice, > + * this list of conditions and the following disclaimer in the > documentation > + * and/or other materials provided with the distribution. > + * - Neither the name of the MontaVista Software, Inc. nor the names of its > + * contributors may be used to endorse or promote products derived from > this > + * software without specific prior written permission. > + * > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS > IS" > + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE > + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE > + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE > + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR > + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF > + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS > + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN > + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) > + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF > + * THE POSSIBILITY OF SUCH DAMAGE. > + */ > + > +#include<config.h> > + > +#include<unistd.h> > +#include<fcntl.h> > +#include<sys/ioctl.h> > +#include<linux/types.h> > +#include<linux/watchdog.h> > +#include<linux/reboot.h> > + > +#include<corosync/corotypes.h> > +#include<corosync/corodefs.h> > +#include<corosync/lcr/lcr_comp.h> > +#include<corosync/engine/coroapi.h> > +#include<corosync/list.h> > +#include<corosync/engine/logsys.h> > +#include "../exec/fsm.h" > + > + > +typedef enum { > + WD_RESOURCE_GOOD, > + WD_RESOURCE_FAILED, > + WD_RESOURCE_STATE_UNKNOWN, > + WD_RESOURCE_NOT_MONITORED > +} wd_resource_state_t; > + > +struct resource { > + hdb_handle_t handle; > + char *recovery; > + char name[128]; > + time_t last_updated; > + struct cs_fsm fsm; > + > + corosync_timer_handle_t check_timer; > + uint32_t check_timeout; > +}; > + > +LOGSYS_DECLARE_SUBSYS("WD"); > + > +/* > + * Service Interfaces required by service_message_handler struct > + */ > +static int wd_exec_init_fn ( > + struct corosync_api_v1 *corosync_api); > +static int wd_exec_exit_fn (void); > +static void wd_resource_check_fn (void* resource_ref); > + > +static struct corosync_api_v1 *api; > +#define WD_DEFAULT_TIMEOUT 6 > +static uint32_t watchdog_timeout = WD_DEFAULT_TIMEOUT; > +static uint32_t tickle_timeout = (WD_DEFAULT_TIMEOUT / 2); > +static int dog = -1; > +static corosync_timer_handle_t wd_timer; > +static hdb_handle_t resources_obj; > +static int watchdog_ok = 1; > + > +struct corosync_service_engine wd_service_engine = { > + .name = "corosync self-fencing service", > + .id = WD_SERVICE, > + .priority = 1, > + .private_data_size = 0, > + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED, > + .lib_init_fn = NULL, > + .lib_exit_fn = NULL, > + .lib_engine = NULL, > + .lib_engine_count = 0, > + .exec_engine = NULL, > + .exec_engine_count = 0, > + .confchg_fn = NULL, > + .exec_init_fn = wd_exec_init_fn, > + .exec_exit_fn = wd_exec_exit_fn, > + .exec_dump_fn = NULL, > + .sync_mode = CS_SYNC_V2 > +}; > + > +static DECLARE_LIST_INIT (confchg_notify); > + > +/* > + * F S M > + */ > +static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * > data); > +static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * > data); > + > +enum wd_resource_state { > + WD_S_GOOD, > + WD_S_FAILED, > + WD_S_DISABLED > +}; > + > +enum wd_resource_event { > + WD_E_FAILURE, > + WD_E_CONFIG_CHANGED > +}; > + > +const char * wd_ok_str = "ok"; > +const char * wd_failed_str = "failed"; > +const char * wd_failure_str = "failure"; > +const char * wd_disabled_str = "disabled"; > +const char * wd_config_changed_str = "config_changed"; > + > +struct cs_fsm_entry wd_fsm_table[] = { > + { WD_S_DISABLED, WD_E_CONFIG_CHANGED, wd_config_changed, > {WD_S_DISABLED, WD_S_GOOD, -1} }, > + { WD_S_DISABLED, WD_E_FAILURE, NULL, > {-1} }, > + { WD_S_GOOD, WD_E_CONFIG_CHANGED, wd_config_changed, > {WD_S_GOOD, WD_S_DISABLED, -1} }, > + { WD_S_GOOD, WD_E_FAILURE, wd_resource_failed, > {WD_S_FAILED, -1} }, > + { WD_S_FAILED, WD_E_CONFIG_CHANGED, wd_config_changed, > {WD_S_GOOD, WD_S_DISABLED, -1} }, > + { WD_S_FAILED, WD_E_FAILURE, NULL, > {-1} }, > +}; > + > +/* > + * Dynamic loading descriptor > + */ > + > +static struct corosync_service_engine *wd_get_service_engine_ver0 (void); > + > +static struct corosync_service_engine_iface_ver0 wd_service_engine_iface = { > + .corosync_get_service_engine_ver0 = wd_get_service_engine_ver0 > +}; > + > +static struct lcr_iface corosync_wd_ver0[1] = { > + { > + .name = "corosync_wd", > + .version = 0, > + .versions_replace = 0, > + .versions_replace_count = 0, > + .dependencies = 0, > + .dependency_count = 0, > + .constructor = NULL, > + .destructor = NULL, > + .interfaces = NULL, > + } > +}; > + > +static struct lcr_comp wd_comp_ver0 = { > + .iface_count = 1, > + .ifaces = corosync_wd_ver0 > +}; > + > +static struct corosync_service_engine *wd_get_service_engine_ver0 (void) > +{ > + return (&wd_service_engine); > +} > + > +#ifdef COROSYNC_SOLARIS > +void corosync_lcr_component_register (void); > + > +void corosync_lcr_component_register (void) { > +#else > +__attribute__ ((constructor)) static void corosync_lcr_component_register > (void) { > +#endif > + lcr_interfaces_set (&corosync_wd_ver0[0],&wd_service_engine_iface); > + > + lcr_component_register (&wd_comp_ver0); > +} > + > +static int object_find_or_create ( > + hdb_handle_t parent_object_handle, > + hdb_handle_t *object_handle, > + const void *object_name, > + size_t object_name_len) > +{ > + hdb_handle_t obj_finder; > + hdb_handle_t obj; > + int ret = -1; > + > + api->object_find_create ( > + parent_object_handle, > + object_name, > + object_name_len, > + &obj_finder); > + > + if (api->object_find_next (obj_finder,&obj) == 0) { > + /* found it */ > + *object_handle = obj; > + ret = 0; > + } > + else { > + ret = api->object_create (parent_object_handle, > + object_handle, > + object_name, object_name_len); > + } > + > + api->object_find_destroy (obj_finder); > + return ret; > +} > + > +static const char * wd_res_state_to_str(struct cs_fsm* fsm, > + int32_t state) > +{ > + switch (state) { > + case WD_S_DISABLED: > + return wd_disabled_str; > + break; > + case WD_S_GOOD: > + return wd_ok_str; > + break; > + case WD_S_FAILED: > + return wd_failed_str; > + break; > + } > + return NULL; > +} > + > +static const char * wd_res_event_to_str(struct cs_fsm* fsm, > + int32_t event) > +{ > + switch (event) { > + case WD_E_CONFIG_CHANGED: > + return wd_config_changed_str; > + break; > + case WD_E_FAILURE: > + return wd_failure_str; > + break; > + } > + return NULL; > +} > + > +/* > + * returns (0 == OK, 1 == failed) > + */ > +static int32_t wd_resource_has_failed (struct resource *ref) > +{ > + hdb_handle_t resource = ref->handle; > + int res; > + char* state; > + size_t state_len; > + objdb_value_types_t type; > + time_t *last_updated; > + time_t my_time; > + size_t last_updated_len; > + > + res = api->object_key_get_typed (resource, > + "last_updated", (void*)&last_updated,&last_updated_len,&type); > + if (res != 0) { > + /* key does not exist. > + */ > + return 1; > + } > + res = api->object_key_get_typed (resource, > + "state", (void**)&state,&state_len,&type); > + if (res != 0 || strncmp (state, "disabled", strlen ("disabled")) == 0) { > + /* key does not exist. > + */ > + return 1; > + } > + > + my_time = time (NULL); > + > + if ((*last_updated + ref->check_timeout)< my_time) { > + log_printf (LOGSYS_LEVEL_INFO, "delayed %ld + %d< %ld", > + *last_updated, ref->check_timeout, my_time); > + return 1; > + } > + > + if ((*last_updated + ref->check_timeout)< my_time || > + strcmp (state, "bad") == 0) { > + return 1; > + } > + return 0; > +} > + > +static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * > data) > +{ > + int res; > + size_t len; > + char *state; > + objdb_value_types_t type; > + char mon_period_str[32]; > + int32_t tmp_value; > + struct resource *ref = (struct resource*)data; > + > + res = api->object_key_get_typed (ref->handle, > + "poll_period", > + (void**)&mon_period_str,&len, > + &type); > + if (res == 0) { > + tmp_value = strtol (mon_period_str, NULL, 0); > + if (tmp_value> 0&& tmp_value< 120) > + ref->check_timeout = (tmp_value * 5)/4; > + } > + > + res = api->object_key_get_typed (ref->handle, > + "recovery", (void*)&ref->recovery,&len,&type); > + if (res != 0) { > + /* key does not exist. > + */ > + log_printf (LOGSYS_LEVEL_WARNING, > + "resource %s missing a recovery key.", ref->name); > + cs_fsm_state_set(&ref->fsm, WD_S_DISABLED, ref); > + return; > + } > + res = api->object_key_get_typed (ref->handle, > + "state", (void*)&state,&len,&type); > + if (res != 0) { > + /* key does not exist. > + */ > + log_printf (LOGSYS_LEVEL_WARNING, > + "resource %s missing a state key.", ref->name); > + cs_fsm_state_set(&ref->fsm, WD_S_DISABLED, ref); > + return; > + } > + > + cs_fsm_state_set(&ref->fsm, WD_S_GOOD, ref); > + > + if (ref->check_timer) { > + api->timer_delete(ref->check_timer); > + } > + api->timer_add_duration((unsigned long > long)ref->check_timeout*1000000000, > + ref, > + wd_resource_check_fn,&ref->check_timer); > + > +} > + > +static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * > data) > +{ > + struct resource* ref = (struct resource*)data; > + > + if (ref->check_timer) { > + api->timer_delete(ref->check_timer); > + } > + > + log_printf (LOGSYS_LEVEL_CRIT, "%s resource \"%s\" failed!", > + ref->recovery, (char*)ref->name); > + if (strcmp (ref->recovery, "watchdog") == 0 || > + strcmp (ref->recovery, "quit") == 0) { > + watchdog_ok = 0; > + } > + else if (strcmp (ref->recovery, "reboot") == 0) { > + //reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, > LINUX_REBOOT_CMD_RESTART, NULL); > + } > + else if (strcmp (ref->recovery, "shutdown") == 0) { > + //reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, > LINUX_REBOOT_CMD_POWER_OFF, NULL); > + } > + cs_fsm_state_set(fsm, WD_S_FAILED, data); > +} > + > +static void wd_key_changed(object_change_type_t change_type, > + hdb_handle_t parent_object_handle, > + hdb_handle_t object_handle, > + const void *object_name_pt, size_t object_name_len, > + const void *key_name_pt, size_t key_len, > + const void *key_value_pt, size_t key_value_len, > + void *priv_data_pt) > +{ > + struct resource* ref = (struct resource*)priv_data_pt; > + > + if (strcmp(key_name_pt, "last_updated") == 0 || > + strcmp(key_name_pt, "current") == 0) { > + return; > + } > +// log_printf (LOGSYS_LEVEL_WARNING, > +// "watchdog resource key changed: %s.%s=%s ref=%p.", > +// (char*)object_name_pt, (char*)key_name_pt, (char*)key_value_pt, > ref); > + > + if (ref == NULL) { > + return; > + } > + cs_fsm_process(&ref->fsm, WD_E_CONFIG_CHANGED, ref); > +} > + > +static void wd_object_destroyed( > + hdb_handle_t parent_object_handle, > + const void *name_pt, size_t name_len, > + void *priv_data_pt) > +{ > + struct resource* ref = (struct resource*)priv_data_pt; > + > + log_printf (LOGSYS_LEVEL_WARNING, > + "watchdog resource \"%s\" deleted from objdb!", > + (char*)name_pt); > + > + if (ref) { > + api->timer_delete(ref->check_timer); > + ref->check_timer = NULL; > + } > +} > + > +static void wd_resource_check_fn (void* resource_ref) > +{ > + struct resource* ref = (struct resource*)resource_ref; > + > + log_printf (LOGSYS_LEVEL_INFO, > + "checking watchdog resource \"%s\".", > + ref->name); > + if (wd_resource_has_failed (ref) ) { > + cs_fsm_process(&ref->fsm, WD_E_FAILURE, ref); > + log_printf (LOGSYS_LEVEL_CRIT, > + "watchdog resource \"%s\" failed!", > + (char*)ref->name); > + return; > + } > + api->timer_add_duration((unsigned long > long)ref->check_timeout*1000000000, > + ref, wd_resource_check_fn,&ref->check_timer); > +} > + > + > +static void wd_resource_create (hdb_handle_t resource_obj) > +{ > + int res; > + size_t len; > + char *state; > + objdb_value_types_t type; > + char mon_period_str[32]; > + int32_t tmp_value; > + struct resource *ref = malloc (sizeof (struct resource)); > + > + ref->handle = resource_obj; > + ref->check_timeout = WD_DEFAULT_TIMEOUT; > + ref->check_timer = NULL; > + api->object_name_get (resource_obj, > + ref->name, > + &len); > + ref->name[len] = '\0'; > + ref->fsm.name = ref->name; > + ref->fsm.table = wd_fsm_table; > + ref->fsm.entries = sizeof(wd_fsm_table) / sizeof(struct cs_fsm_entry); > + ref->fsm.curr_entry = 0; > + ref->fsm.curr_state = WD_S_DISABLED; > + ref->fsm.state_to_str = wd_res_state_to_str; > + ref->fsm.event_to_str = wd_res_event_to_str; > + api->object_priv_set (resource_obj, NULL); > + > + res = api->object_key_get_typed (resource_obj, > + "poll_period", > + (void**)&mon_period_str,&len, > + &type); > + if (res != 0) { > + log_printf (LOGSYS_LEVEL_ERROR, "%s : %d",__func__, res); > + len = snprintf (mon_period_str, 32, "%d", ref->check_timeout); > + api->object_key_create_typed (resource_obj, > + "poll_period",&mon_period_str, > + len, > + OBJDB_VALUETYPE_STRING); > + } > + else { > + tmp_value = strtol (mon_period_str, NULL, 0); > + if (tmp_value> 0&& tmp_value< 120) > + ref->check_timeout = (tmp_value * 5)/4; > + } > + > + api->object_track_start (resource_obj, OBJECT_TRACK_DEPTH_ONE, > + wd_key_changed, NULL, wd_object_destroyed, > + NULL, ref); > + > + res = api->object_key_get_typed (resource_obj, > + "recovery", (void*)&ref->recovery,&len,&type); > + if (res != 0) { > + /* key does not exist. > + */ > + log_printf (LOGSYS_LEVEL_WARNING, > + "resource %s missing a recovery key.", ref->name); > + return; > + } > + res = api->object_key_get_typed (resource_obj, > + "state", (void*)&state,&len,&type); > + if (res != 0) { > + /* key does not exist. > + */ > + log_printf (LOGSYS_LEVEL_WARNING, > + "resource %s missing a state key.", ref->name); > + return; > + } > + > + res = api->object_key_get_typed (resource_obj, > + "last_updated", (void*)&ref->last_updated,&len,&type); > + if (res != 0) { > + /* key does not exist. > + */ > + ref->last_updated = 0; > + } > + > + api->timer_add_duration((unsigned long > long)ref->check_timeout*1000000000, > + ref, > + wd_resource_check_fn,&ref->check_timer); > + > + cs_fsm_state_set(&ref->fsm, WD_S_GOOD, ref); > +} > + > + > +static void wd_tickle_fn (void* arg) > +{ > + ENTER(); > + > + if (watchdog_ok) { > + if (dog> 0) > + ioctl(dog, WDIOC_KEEPALIVE,&watchdog_ok); > + } > + else { > + log_printf (LOGSYS_LEVEL_ALERT, "NOT tickling the watchdog!"); > + } > + > + api->timer_add_duration((unsigned long long)tickle_timeout*1000000000, > NULL, > + wd_tickle_fn,&wd_timer); > +} > + > +static void wd_resource_object_created(hdb_handle_t parent_object_handle, > + hdb_handle_t object_handle, > + const void *name_pt, size_t name_len, > + void *priv_data_pt) > +{ > + wd_resource_create (object_handle); > +} > + > +static void wd_scan_resources (void) > +{ > + hdb_handle_t obj_finder; > + hdb_handle_t obj_finder2; > + hdb_handle_t resource_type; > + hdb_handle_t resource; > + int res; > + > + ENTER(); > + > + api->object_find_create ( > + OBJECT_PARENT_HANDLE, > + "resources", strlen ("resources"), > + &obj_finder); > + > + res = api->object_find_next (obj_finder,&resources_obj); > + api->object_find_destroy (obj_finder); > + if (res != 0) { > + log_printf (LOGSYS_LEVEL_INFO, "no resources."); > + return; > + } > + > + /* this will be the system or process level > + */ > + api->object_find_create ( > + resources_obj, > + NULL, 0, > + &obj_finder); > + while (api->object_find_next (obj_finder, > + &resource_type) == 0) { > + > + api->object_find_create ( > + resource_type, > + NULL, 0, > + &obj_finder2); > + > + while (api->object_find_next (obj_finder2, > + &resource) == 0) { > + > + wd_resource_create (resource); > + } > + api->object_find_destroy (obj_finder2); > + > + api->object_track_start (resource_type, OBJECT_TRACK_DEPTH_ONE, > + NULL, wd_resource_object_created, NULL, > + NULL, NULL); > + } > + api->object_find_destroy (obj_finder); > +} > + > + > +static void watchdog_timeout_apply (uint32_t new) > +{ > + struct watchdog_info ident; > + > + if (new< 2) { > + watchdog_timeout = 2; > + } > + else if (new> 120) { > + watchdog_timeout = 120; > + } > + else { > + watchdog_timeout = new; > + } > + > + if (dog> 0) { > + ioctl(dog, WDIOC_GETSUPPORT,&ident); > + if (ident.options& WDIOF_SETTIMEOUT) { > + /* yay! the dog is trained. > + */ > + ioctl(dog, WDIOC_SETTIMEOUT,&watchdog_timeout); > + } > + ioctl(dog, WDIOC_GETTIMEOUT,&watchdog_timeout); > + } > + tickle_timeout = watchdog_timeout / 2; > + > + log_printf (LOGSYS_LEVEL_DEBUG, "The Watchdog timeout is %d seconds\n", > watchdog_timeout); > + log_printf (LOGSYS_LEVEL_DEBUG, "The tickle timeout is %d seconds\n", > tickle_timeout); > +} > + > +static int setup_watchdog(void) > +{ > + struct watchdog_info ident; > + > + ENTER(); > + if (access ("/dev/watchdog", W_OK) != 0) { > + log_printf (LOGSYS_LEVEL_WARNING, "No Watchdog, try modprobe<a > watchdog>"); > + dog = -1; > + return -1; > + } > + > + /* here goes, lets hope they have "Magic Close" > + */ > + dog = open("/dev/watchdog", O_WRONLY); > + > + if (dog == -1) { > + log_printf (LOGSYS_LEVEL_WARNING, "Watchdog exists but couldn't > be opened."); > + dog = -1; > + return -1; > + } > + > + /* Right we have the dog. > + * Lets see what breed it is. > + */ > + > + ioctl(dog, WDIOC_GETSUPPORT,&ident); > + log_printf (LOGSYS_LEVEL_INFO, "Watchdog is now been tickled by > corosync."); > + log_printf (LOGSYS_LEVEL_DEBUG, "%s", ident.identity); > + > + watchdog_timeout_apply (watchdog_timeout); > + > + ioctl(dog, WDIOC_SETOPTIONS, WDIOS_ENABLECARD); > + > + return 0; > +} > + > +static void wd_top_level_key_changed(object_change_type_t change_type, > + hdb_handle_t parent_object_handle, > + hdb_handle_t object_handle, > + const void *object_name_pt, size_t object_name_len, > + const void *key_name_pt, size_t key_len, > + const void *key_value_pt, size_t key_value_len, > + void *priv_data_pt) > +{ > + uint32_t tmp_value; > + > + ENTER(); > + if (change_type != OBJECT_KEY_DELETED&& > + strncmp ((char*)key_name_pt, "watchdog_timeout", key_value_len) > == 0) { > + tmp_value = strtol (key_value_pt, NULL, 0); > + watchdog_timeout_apply (tmp_value); > + } > + else { > + watchdog_timeout_apply (WD_DEFAULT_TIMEOUT); > + } > + log_printf (LOGSYS_LEVEL_INFO, "new(%d) tickle_timeout: %d", > change_type, tickle_timeout); > +} > + > + > +static void watchdog_timeout_get_initial (void) > +{ > + int32_t res; > + char watchdog_timeout_str[32]; > + size_t watchdog_timeout_len; > + objdb_value_types_t watchdog_timeout_type; > + uint32_t tmp_value; > + > + ENTER(); > + > + res = api->object_key_get_typed (resources_obj, > + "watchdog_timeout", > + (void**)&watchdog_timeout_str,&watchdog_timeout_len, > + &watchdog_timeout_type); > + if (res != 0) { > + watchdog_timeout_apply (WD_DEFAULT_TIMEOUT); > + > + watchdog_timeout_len = snprintf (watchdog_timeout_str, 32, > "%d", watchdog_timeout); > + api->object_key_create_typed (resources_obj, > + "watchdog_timeout",&watchdog_timeout_str, > + watchdog_timeout_len, > + OBJDB_VALUETYPE_STRING); > + } > + else { > + tmp_value = strtol (watchdog_timeout_str, NULL, 0); > + watchdog_timeout_apply (tmp_value); > + } > + > + api->object_track_start (resources_obj, OBJECT_TRACK_DEPTH_ONE, > + wd_top_level_key_changed, NULL, NULL, > + NULL, NULL); > + > +} > + > +static int wd_exec_init_fn ( > + struct corosync_api_v1 *corosync_api) > +{ > + hdb_handle_t obj; > + > + ENTER(); > +#ifdef COROSYNC_SOLARIS > + logsys_subsys_init(); > +#endif > + api = corosync_api; > + > + object_find_or_create (OBJECT_PARENT_HANDLE, > + &resources_obj, > + "resources", strlen ("resources")); > + object_find_or_create (resources_obj, > + &obj, > + "system", strlen ("system")); > + object_find_or_create (resources_obj, > + &obj, > + "process", strlen ("process")); > + > + watchdog_timeout_get_initial(); > + > + setup_watchdog(); > + > + wd_scan_resources(); > + > + api->timer_add_duration((unsigned long long)tickle_timeout*1000000000, > NULL, > + wd_tickle_fn,&wd_timer); > + > + return 0; > +} > + > +static int wd_exec_exit_fn (void) > +{ > + char magic = 'V'; > + ENTER(); > + > + if (dog> 0) { > + log_printf (LOGSYS_LEVEL_INFO, "magically closing the > watchdog."); > + write (dog,&magic, 1); > + } > + return 0; > +} > + > + _______________________________________________ Openais mailing list [email protected] https://lists.linux-foundation.org/mailman/listinfo/openais
