* consult with consensus service before promoting node to active
* add watch thread and self-fence if it detects active controller
  has been changed (if remote fencing is disabled)
---
 src/rde/Makefile.am       |  3 ++-
 src/rde/rded/osaf-rded.in |  4 ++++
 src/rde/rded/rde_cb.h     |  4 +++-
 src/rde/rded/rde_main.cc  | 38 +++++++++++++++++++++++++++++++++-----
 src/rde/rded/role.cc      | 45 ++++++++++++++++++++++++++++++++++++++++++++-
 src/rde/rded/role.h       |  3 +++
 6 files changed, 89 insertions(+), 8 deletions(-)

diff --git a/src/rde/Makefile.am b/src/rde/Makefile.am
index c967f9fc4..182f347ab 100644
--- a/src/rde/Makefile.am
+++ b/src/rde/Makefile.am
@@ -58,7 +58,8 @@ bin_osafrded_SOURCES = \
 
 bin_osafrded_LDADD = \
        lib/libSaAmf.la \
-       lib/libopensaf_core.la
+       lib/libopensaf_core.la \
+       lib/libosaf_common.la
 
 bin_rdegetrole_CPPFLAGS = \
        $(AM_CPPFLAGS)
diff --git a/src/rde/rded/osaf-rded.in b/src/rde/rded/osaf-rded.in
index 1c1786c8d..1697936a7 100644
--- a/src/rde/rded/osaf-rded.in
+++ b/src/rde/rded/osaf-rded.in
@@ -28,6 +28,10 @@ else
        . $pkgsysconfdir/rde.conf
 fi     
 
+if [ -f "$pkgsysconfdir/fmd.conf" ]; then
+  . "$pkgsysconfdir/fmd.conf"
+fi
+
 binary=$pkglibdir/$osafprog
 pidfile=$pkgpiddir/$osafprog.pid
 tracefile=$pkglogdir/$osafprog.log
diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h
index d2a3d46b2..fc100849a 100644
--- a/src/rde/rded/rde_cb.h
+++ b/src/rde/rded/rde_cb.h
@@ -39,13 +39,15 @@ struct RDE_CONTROL_BLOCK {
   bool task_terminate;
   RDE_RDA_CB rde_rda_cb;
   RDE_AMF_CB rde_amf_cb;
+  bool monitor_lock_thread_running;
 };
 
 enum RDE_MSG_TYPE {
   RDE_MSG_PEER_UP = 1,
   RDE_MSG_PEER_DOWN = 2,
   RDE_MSG_PEER_INFO_REQ = 3,
-  RDE_MSG_PEER_INFO_RESP = 4
+  RDE_MSG_PEER_INFO_RESP = 4,
+  RDE_MSG_NEW_ACTIVE_CALLBACK = 5
 };
 
 struct rde_peer_info {
diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc
index 0298bf3ff..082c1c040 100644
--- a/src/rde/rded/rde_main.cc
+++ b/src/rde/rded/rde_main.cc
@@ -28,6 +28,7 @@
 #include <cerrno>
 #include <cstdlib>
 #include <cstring>
+#include "osaf/consensus/service.h"
 #include "base/daemon.h"
 #include "base/logtrace.h"
 #include "base/osaf_poll.h"
@@ -37,6 +38,7 @@
 #include <saAmf.h>
 #include "rde/rded/rde_cb.h"
 #include "rde/rded/role.h"
+#include "base/conf.h"
 
 #define RDA_MAX_CLIENTS 32
 
@@ -92,10 +94,6 @@ static void handle_mbx_event() {
   TRACE_ENTER();
 
   msg = reinterpret_cast<rde_msg *>(ncs_ipc_non_blk_recv(&rde_cb->mbx));
-  TRACE("Received %s from node 0x%x with state %s. My state is %s",
-        rde_msg_name[msg->type], msg->fr_node_id,
-        Role::to_string(msg->info.peer_info.ha_role),
-        Role::to_string(role->role()));
 
   switch (msg->type) {
     case RDE_MSG_PEER_INFO_REQ:
@@ -118,6 +116,34 @@ static void handle_mbx_event() {
     case RDE_MSG_PEER_DOWN:
       LOG_NO("Peer down on node 0x%x", msg->fr_node_id);
       break;
+   case RDE_MSG_NEW_ACTIVE_CALLBACK:
+      {
+        const std::string my_node = base::Conf::NodeName();
+        rde_cb->monitor_lock_thread_running = false;
+
+        // get current active controller
+        Consensus consensus_service;
+        std::string active_controller = consensus_service.CurrentActive();
+
+        LOG_NO("New active controller notification from consensus service");
+
+        if (role->role() == PCS_RDA_ACTIVE) {
+          if (my_node.compare(active_controller) != 0) {
+            // we are meant to be active, but consensus service doesn't think 
so
+            LOG_WA("Role does not match consensus service. New controller: %s",
+              active_controller.c_str());
+            if (consensus_service.IsRemoteFencingEnabled() == false ) {
+              LOG_ER("Probable split-brain. Rebooting this node");
+              opensaf_reboot(0, nullptr, "Split-brain detected by consensus 
service");
+            }
+          }
+
+          // register for callback
+          rde_cb->monitor_lock_thread_running = true;
+          consensus_service.MonitorLock(Role::MonitorCallback, rde_cb->mbx);
+        }
+      }
+      break;
     default:
       LOG_ER("%s: discarding unknown message type %u", __FUNCTION__, 
msg->type);
       break;
@@ -192,6 +218,7 @@ static int initialize_rde() {
     goto init_failed;
   }
 
+  rde_cb->monitor_lock_thread_running = false;
   rc = NCSCC_RC_SUCCESS;
 
 init_failed:
@@ -205,11 +232,12 @@ int main(int argc, char *argv[]) {
   NCS_SEL_OBJ mbx_sel_obj;
   RDE_RDA_CB *rde_rda_cb = &rde_cb->rde_rda_cb;
   int term_fd;
-
   opensaf_reboot_prepare();
 
   daemonize(argc, argv);
 
+  base::Conf::InitNodeName();
+
   if (initialize_rde() != NCSCC_RC_SUCCESS) goto init_failed;
 
   mbx_sel_obj = ncs_ipc_get_sel_obj(&rde_cb->mbx);
diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc
index f7511f0d8..c821aeb33 100644
--- a/src/rde/rded/role.cc
+++ b/src/rde/rded/role.cc
@@ -27,7 +27,9 @@
 #include "base/process.h"
 #include "base/time.h"
 #include "base/ncs_main_papi.h"
+#include "base/ncssysf_def.h"
 #include "rde/rded/rde_cb.h"
+#include "osaf/consensus/service.h"
 
 const char* const Role::role_names_[] = {"Undefined", "ACTIVE",    "STANDBY",
                                          "QUIESCED",  "QUIESCING", "Invalid"};
@@ -42,6 +44,20 @@ const char* Role::to_string(PCS_RDA_ROLE role) {
              : role_names_[0];
 }
 
+void Role::MonitorCallback(const std::string& key,
+  const std::string& new_value, SYSF_MBX mbx)
+{
+  TRACE_ENTER();
+
+  rde_msg* msg = static_cast<rde_msg *>(malloc(sizeof(rde_msg)));
+  msg->type = RDE_MSG_NEW_ACTIVE_CALLBACK;
+
+  uint32_t status;
+  status = m_NCS_IPC_SEND(&mbx,
+    msg, NCS_IPC_PRIORITY_NORMAL);
+  osafassert(status == NCSCC_RC_SUCCESS);
+}
+
 Role::Role(NODE_ID own_node_id)
     : known_nodes_{},
       role_{PCS_RDA_QUIESCED},
@@ -61,10 +77,26 @@ timespec* Role::Poll(timespec* ts) {
       *ts = election_end_time_ - now;
       timeout = ts;
     } else {
+      SaAisErrorT rc;
+      Consensus consensus_service;
+      rc = consensus_service.PromoteThisNode();
+      if (rc != SA_AIS_OK) {
+        LOG_ER("Unable to set active controller in consensus service");
+        opensaf_reboot(0, nullptr, "Unable to set active controller in 
consensus service");
+      }
+
       ExecutePreActiveScript();
       LOG_NO("Switched to ACTIVE from %s", to_string(role()));
       role_ = PCS_RDA_ACTIVE;
       rde_rda_send_role(role_);
+
+      // register for callback if active controller is changed
+      // in consensus service
+      RDE_CONTROL_BLOCK* cb = rde_get_control_block();
+      if (cb->monitor_lock_thread_running == false) {
+        cb->monitor_lock_thread_running = true;
+        consensus_service.MonitorLock(MonitorCallback, cb->mbx);
+      }
     }
   }
   return timeout;
@@ -91,7 +123,18 @@ uint32_t Role::SetRole(PCS_RDA_ROLE new_role) {
   }
   if (new_role != old_role) {
     LOG_NO("RDE role set to %s", to_string(new_role));
-    if (new_role == PCS_RDA_ACTIVE) ExecutePreActiveScript();
+    if (new_role == PCS_RDA_ACTIVE) {
+      ExecutePreActiveScript();
+
+      // register for callback if active controller is changed
+      // in consensus service
+      Consensus consensus_service;
+      RDE_CONTROL_BLOCK* cb = rde_get_control_block();
+      if (cb->monitor_lock_thread_running == false) {
+        cb->monitor_lock_thread_running = true;
+        consensus_service.MonitorLock(MonitorCallback, cb->mbx);
+      }
+    }
     role_ = new_role;
     if (new_role == PCS_RDA_UNDEFINED) {
       known_nodes_.clear();
diff --git a/src/rde/rded/role.h b/src/rde/rded/role.h
index 20219b535..bee983828 100644
--- a/src/rde/rded/role.h
+++ b/src/rde/rded/role.h
@@ -21,6 +21,7 @@
 #include <time.h>
 #include <cstdint>
 #include <set>
+#include <string>
 #include "base/macros.h"
 #include "mds/mds_papi.h"
 #include "rde/agent/rda_papi.h"
@@ -38,6 +39,8 @@ class Role {
   uint32_t SetRole(PCS_RDA_ROLE new_role);
   PCS_RDA_ROLE role() const;
   static const char* to_string(PCS_RDA_ROLE role);
+  static void MonitorCallback(const std::string& key,
+    const std::string& new_value, SYSF_MBX mbx);
 
  private:
   static const uint64_t kDefaultDiscoverPeerTimeout = 2000;
-- 
2.14.1


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to