---
00-README.conf | 10 +++++++++-
src/base/osaf_utility.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
src/base/osaf_utility.h | 5 +++++
src/clm/clmnd/main.c | 1 +
src/nid/nodeinit.cc | 2 ++
5 files changed, 61 insertions(+), 1 deletion(-)
diff --git a/00-README.conf b/00-README.conf
index f64aa031c..5b06b15d5 100644
--- a/00-README.conf
+++ b/00-README.conf
@@ -610,4 +610,12 @@ A message will be written if the latency is > 0.1 second,
example below shows a
messages.1:Sep 12 13:09:26 SC-1 osafimmd[26732]: NO MDS timerfd expired 10
times
-If the latency exceeds 4 seconds a sigalrm will be sent and the process will
be aborted.
\ No newline at end of file
+If the latency exceeds 4 seconds a sigalrm will be sent and the process will
be aborted.
+
+If clm adm command for cluster reboot is issued an environment variable
+OPENSAF_CLUSTER_REBOOT_WAIT_TIME_SEC can be set in opensafd script to specify
+the time to wait for nodes to be started, except for the active node.
+Default is two seconds. A file, "clm_cluster_reboot_in_progress", is created
+on each node, except on the active node. This file indicates that a cluster
+reboot is in progress and all nodes needs to delay their start, this to give
+the active a lead.
diff --git a/src/base/osaf_utility.c b/src/base/osaf_utility.c
index 230cd7e0f..6ee6c3d8f 100644
--- a/src/base/osaf_utility.c
+++ b/src/base/osaf_utility.c
@@ -23,9 +23,53 @@
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
+
+#include <limits.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
#include "base/ncssysf_def.h"
#include "osaf/configmake.h"
+void osaf_wait_for_active_to_start(void)
+{
+ struct stat statbuf;
+ static char file[NAME_MAX];
+ const char *wait_time_str = NULL;
+ unsigned int wait_time = kDfltClusterRebootWaitTimeSec;
+
+ if ((wait_time_str = getenv("OPENSAF_CLUSTER_REBOOT_WAIT_TIME_SEC")) !=
NULL) {
+ wait_time = strtol(wait_time_str, NULL, 0);
+ }
+ snprintf(file, sizeof(file), PKGLOGDIR "/%s",
kClmClusterRebootInProgress);
+
+ if (stat(file, &statbuf) != 0) {
+ syslog(LOG_NOTICE, "Reboot file %s not found, startup
continue...", file);
+ return;
+ }
+
+ syslog(LOG_NOTICE, "Cluster reboot in progress, this node will start in
%u second(s)", wait_time);
+
+ sleep(wait_time);
+
+ if (unlink(file) == -1) {
+ syslog(LOG_ERR, "cannot remove file %s: %s", file,
strerror(errno));
+ }
+}
+
+void osaf_create_cluster_reboot_in_progress_file(void)
+{
+ static char file[NAME_MAX];
+ snprintf(file, sizeof(file), PKGLOGDIR "/%s",
kClmClusterRebootInProgress);
+ int fd;
+
+ if ((fd = open(file, O_RDWR | O_CREAT, 0644)) < 0) {
+ syslog(LOG_ERR, "Open %s failed, %s", file, strerror(errno));
+ return;
+ }
+ close(fd);
+}
+
void osaf_abort(long i_cause)
{
syslog(LOG_ERR, "osaf_abort(%ld) called from %p with errno=%d", i_cause,
diff --git a/src/base/osaf_utility.h b/src/base/osaf_utility.h
index b935c5003..f7b5a07b3 100644
--- a/src/base/osaf_utility.h
+++ b/src/base/osaf_utility.h
@@ -30,6 +30,8 @@
extern "C" {
#endif
+#define kClmClusterRebootInProgress "clm_cluster_reboot_in_progress"
+enum { kDfltClusterRebootWaitTimeSec = 2 };
enum { kOsafUseSafeReboot = 1 };
/**
@@ -71,6 +73,9 @@ extern void osaf_abort(long i_cause) __attribute__((
extern void osaf_safe_reboot(void) __attribute__((nothrow));
+extern void osaf_wait_for_active_to_start(void);
+extern void osaf_create_cluster_reboot_in_progress_file(void);
+
static inline void osaf_mutex_lock_ordie(pthread_mutex_t* io_mutex) {
int result = pthread_mutex_lock(io_mutex);
if (result != 0) osaf_abort(result);
diff --git a/src/clm/clmnd/main.c b/src/clm/clmnd/main.c
index 3a8479600..2801c218f 100644
--- a/src/clm/clmnd/main.c
+++ b/src/clm/clmnd/main.c
@@ -122,6 +122,7 @@ static uint32_t clmna_mds_dec(struct ncsmds_callback_info
*info)
// Reboot will be performed by CLMS for this node.
if (clmna_cb->node_info.node_id !=
msg->info.reboot_info.node_id) {
+ osaf_create_cluster_reboot_in_progress_file();
osaf_safe_reboot();
}
break;
diff --git a/src/nid/nodeinit.cc b/src/nid/nodeinit.cc
index 9eddd743d..5a4b73cc6 100644
--- a/src/nid/nodeinit.cc
+++ b/src/nid/nodeinit.cc
@@ -1625,6 +1625,8 @@ int main(int argc, char *argv[]) {
TRACE_ENTER();
+ osaf_wait_for_active_to_start();
+
#ifdef RLIMIT_RTPRIO
struct rlimit mylimit;
mylimit.rlim_max = mylimit.rlim_cur = sched_get_priority_max(SCHED_RR);
--
2.14.1
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel