If enabled at runtime and this node is active, promote this node
in consensus service.

If disabled at runtime, watch threads will terminate gracefully when
the plugin exits after losing connectivty to the consensus service.
---
 src/rde/rded/osaf-rded.in |  1 +
 src/rde/rded/rde_main.cc  | 59 +++++++++++++++++++++++++++++++++++++++++++----
 src/rde/rded/rde_rda.h    |  3 +++
 src/rde/rded/role.cc      | 27 +++++++++++++++++-----
 src/rde/rded/role.h       |  1 +
 5 files changed, 81 insertions(+), 10 deletions(-)

diff --git a/src/rde/rded/osaf-rded.in b/src/rde/rded/osaf-rded.in
index 391fa50..05740a4 100644
--- a/src/rde/rded/osaf-rded.in
+++ b/src/rde/rded/osaf-rded.in
@@ -28,6 +28,7 @@ else
        unset args
        . $pkgsysconfdir/nid.conf
        . $pkgsysconfdir/rde.conf
+       export FMS_CONF_FILE=$pkgsysconfdir/fmd.conf
 fi     
 
 binary=$pkglibdir/$osafprog
diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc
index b4c8429..33ed5c4 100644
--- a/src/rde/rded/rde_main.cc
+++ b/src/rde/rded/rde_main.cc
@@ -39,11 +39,11 @@
 #include "osaf/consensus/consensus.h"
 #include "rde/rded/rde_cb.h"
 #include "rde/rded/role.h"
-#include "rde_cb.h"
 
 #define RDA_MAX_CLIENTS 32
 
-enum { FD_TERM = 0, FD_AMF = 1, FD_MBX, FD_RDA_SERVER, FD_CLIENT_START };
+enum { FD_TERM = 0, FD_AMF = 1, FD_MBX, FD_RDA_SERVER,
+       FD_SIGHUP, FD_CLIENT_START };
 
 static void SendPeerInfoResp(MDS_DEST mds_dest);
 static void CheckForSplitBrain(const rde_msg *msg);
@@ -64,6 +64,7 @@ const char *rde_msg_name[] = {"-",
 static RDE_CONTROL_BLOCK _rde_cb;
 static RDE_CONTROL_BLOCK *rde_cb = &_rde_cb;
 static NCS_SEL_OBJ usr1_sel_obj;
+static NCS_SEL_OBJ sighup_sel_obj;
 static NODE_ID own_node_id;
 static Role *role;
 
@@ -82,6 +83,10 @@ static void sigusr1_handler(int sig) {
   ncs_sel_obj_ind(&usr1_sel_obj);
 }
 
+static void sighup_handler(int signum, siginfo_t *info, void *ptr) {
+  ncs_sel_obj_ind(&sighup_sel_obj);
+}
+
 static int fd_to_client_ixd(int fd) {
   int i;
   RDE_RDA_CB *rde_rda_cb = &rde_cb->rde_rda_cb;
@@ -128,6 +133,10 @@ static void handle_mbx_event() {
 
       // get current active controller
       Consensus consensus_service;
+      if (consensus_service.IsEnabled() == false) {
+        // disabled during runtime
+        break;
+      }
       std::string active_controller = consensus_service.CurrentActive();
 
       LOG_NO("New active controller notification from consensus service");
@@ -179,11 +188,15 @@ static void handle_mbx_event() {
       rde_cb->monitor_takeover_req_thread_running = false;
 
       if (role->role() == PCS_RDA_ACTIVE) {
-        LOG_NO("Received takeover request '%s'. Our network size is %zu",
+        TRACE("Received takeover request '%s'. Our network size is %zu",
                 msg->info.takeover_request,
                rde_cb->cluster_members.size());
 
         Consensus consensus_service;
+        if (consensus_service.IsEnabled() == false) {
+          // disabled during runtime
+          break;
+        }
         Consensus::TakeoverState state =
             consensus_service.HandleTakeoverRequest(
                 rde_cb->cluster_members.size(),
@@ -230,7 +243,7 @@ static void handle_mbx_event() {
           }
         }
 
-        LOG_NO("Rejected takeover request");
+        TRACE("Rejected takeover request");
 
         rde_cb->monitor_takeover_req_thread_running = true;
         consensus_service.MonitorTakeoverRequest(Role::MonitorCallback,
@@ -284,6 +297,8 @@ static int initialize_rde() {
   if (getenv("SA_AMF_COMPONENT_NAME") == nullptr)
     rde_cb->rde_amf_cb.nid_started = true;
 
+  rde_rda_cb->fmd_conf_file = base::GetEnv("FMS_CONF_FILE", "");
+
   if ((rc = ncs_core_agents_startup()) != NCSCC_RC_SUCCESS) {
     LOG_ER("ncs_core_agents_startup FAILED");
     goto init_failed;
@@ -300,6 +315,12 @@ static int initialize_rde() {
     goto init_failed;
   }
 
+  rc = ncs_sel_obj_create(&sighup_sel_obj);
+  if (rc != NCSCC_RC_SUCCESS) {
+    LOG_ER("ncs_sel_obj_create FAILED");
+    goto init_failed;
+  }
+
   if ((rc = ncs_ipc_create(&rde_cb->mbx)) != NCSCC_RC_SUCCESS) {
     LOG_ER("ncs_ipc_create FAILED");
     goto init_failed;
@@ -316,6 +337,16 @@ static int initialize_rde() {
     goto init_failed;
   }
 
+  struct sigaction sighup;
+  sigemptyset(&sighup.sa_mask);
+  sighup.sa_sigaction = sighup_handler;
+  sighup.sa_flags = SA_SIGINFO;
+
+  if (sigaction(SIGHUP, &sighup, NULL) != 0) {
+    LOG_ER("registering SIGHUP FAILED: %s", strerror(errno));
+    goto init_failed;
+  }
+
   if (rde_discovery_mds_register() != NCSCC_RC_SUCCESS) {
     LOG_ER("rde_discovery_mds_register() failed");
     rc = NCSCC_RC_FAILURE;
@@ -360,6 +391,9 @@ int main(int argc, char *argv[]) {
                                                   : rde_cb->rde_amf_cb.amf_fd;
   fds[FD_AMF].events = POLLIN;
 
+  fds[FD_SIGHUP].fd = sighup_sel_obj.rmv_obj;
+  fds[FD_SIGHUP].events = POLLIN;
+
   /* Mailbox */
   fds[FD_MBX].fd = mbx_sel_obj.rmv_obj;
   fds[FD_MBX].events = POLLIN;
@@ -420,6 +454,23 @@ int main(int argc, char *argv[]) {
       }
     }
 
+    if (fds[FD_SIGHUP].revents & POLLIN) {
+      ncs_sel_obj_rmv_ind(&sighup_sel_obj, true, true);
+      Consensus consensus_service;
+      bool old_setting = consensus_service.IsEnabled();
+      consensus_service.ReloadConfiguration();
+      bool new_setting = consensus_service.IsEnabled();
+      if (role->role() == PCS_RDA_ACTIVE) {
+        if (old_setting == false && new_setting == true) {
+          // if active and switched on, obtain lock
+          role->PromoteNodeLate();
+        } else if (old_setting == true && new_setting == false) {
+          // if active and switched off
+          // @todo remove lock in a new thread
+        }
+      }
+    }
+
     if (fds[FD_MBX].revents & POLLIN) handle_mbx_event();
 
     if (fds[FD_RDA_SERVER].revents & POLLIN) {
diff --git a/src/rde/rded/rde_rda.h b/src/rde/rded/rde_rda.h
index 82bbc89..3483538 100644
--- a/src/rde/rded/rde_rda.h
+++ b/src/rde/rded/rde_rda.h
@@ -37,6 +37,8 @@
 #include <sys/socket.h>
 #include <sys/un.h>
 #include <cstdint>
+#include <string>
+#include "base/getenv.h"
 
 class Role;
 
@@ -85,6 +87,7 @@ struct RDE_RDA_CB {
   int client_count;
   Role *role;
   RDE_RDA_CLIENT clients[MAX_RDA_CLIENTS];
+  std::string fmd_conf_file;
 };
 
 /***************************************************************\
diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc
index 5f107ed..499f7c8 100644
--- a/src/rde/rded/role.cc
+++ b/src/rde/rded/role.cc
@@ -77,8 +77,8 @@ void Role::MonitorCallback(const std::string& key, const 
std::string& new_value,
     size_t len = request.length() + 1;
     msg->info.takeover_request = new char[len];
     strncpy(msg->info.takeover_request, request.c_str(), len);
-    LOG_NO("Sending takeover request '%s' to main thread",
-          msg->info.takeover_request);
+    TRACE("Sending takeover request '%s' to main thread",
+           msg->info.takeover_request);
     if (consensus_service.SelfFence(request) == false &&
         consensus_service.PrioritisePartitionSize() == true) {
       // don't send this to the main thread straight away, as it will
@@ -161,11 +161,13 @@ void Role::NodePromoted() {
 
   // register for callback if active controller is changed
   // in consensus service
-  if (cb->monitor_lock_thread_running == false) {
+  if (consensus_service.IsEnabled() == true &&
+      cb->monitor_lock_thread_running == false) {
     cb->monitor_lock_thread_running = true;
     consensus_service.MonitorLock(MonitorCallback, cb->mbx);
   }
-  if (cb->monitor_takeover_req_thread_running == false) {
+  if (consensus_service.IsEnabled() == true &&
+      cb->monitor_takeover_req_thread_running == false) {
     cb->monitor_takeover_req_thread_running = true;
     consensus_service.MonitorTakeoverRequest(MonitorCallback, cb->mbx);
   }
@@ -276,11 +278,13 @@ uint32_t Role::SetRole(PCS_RDA_ROLE new_role) {
       Consensus consensus_service;
       RDE_CONTROL_BLOCK* cb = rde_get_control_block();
       cb->state = State::kActiveFailover;
-      if (cb->monitor_lock_thread_running == false) {
+      if (consensus_service.IsEnabled() == true &&
+          cb->monitor_lock_thread_running == false) {
         cb->monitor_lock_thread_running = true;
         consensus_service.MonitorLock(MonitorCallback, cb->mbx);
       }
-      if (cb->monitor_takeover_req_thread_running == false) {
+      if (consensus_service.IsEnabled() == true &&
+          cb->monitor_takeover_req_thread_running == false) {
         cb->monitor_takeover_req_thread_running = true;
         consensus_service.MonitorTakeoverRequest(MonitorCallback, cb->mbx);
       }
@@ -334,3 +338,14 @@ void Role::SetPeerState(PCS_RDA_ROLE node_role, NODE_ID 
node_id) {
     }
   }
 }
+
+void Role::PromoteNodeLate() {
+  TRACE_ENTER();
+
+  // we are already active and split brain prevention has been
+  // enabled during runtime, we need to obtain lock
+  RDE_CONTROL_BLOCK* cb = rde_get_control_block();
+  std::thread(&Role::PromoteNode,
+              this, cb->cluster_members.size(),
+              true).detach();
+}
diff --git a/src/rde/rded/role.h b/src/rde/rded/role.h
index 1920f59..ecb67cf 100644
--- a/src/rde/rded/role.h
+++ b/src/rde/rded/role.h
@@ -44,6 +44,7 @@ class Role {
   static void MonitorCallback(const std::string& key,
                               const std::string& new_value, SYSF_MBX mbx);
   void NodePromoted();
+  void PromoteNodeLate();
 
  private:
   static const uint64_t kDefaultDiscoverPeerTimeout = 2000;
-- 
2.7.4



_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to