Currently, the consensus code relating to node promotion
is run from the main thread. We can improve rded's
responsiveness by moving this code into another thread.
---
 src/rde/rded/rde_cb.h    |  3 +-
 src/rde/rded/rde_main.cc |  6 +++-
 src/rde/rded/role.cc     | 82 ++++++++++++++++++++++++++++++------------------
 src/rde/rded/role.h      |  2 ++
 4 files changed, 61 insertions(+), 32 deletions(-)

diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h
index f5ad689c3..877687341 100644
--- a/src/rde/rded/rde_cb.h
+++ b/src/rde/rded/rde_cb.h
@@ -53,7 +53,8 @@ enum RDE_MSG_TYPE {
   RDE_MSG_NEW_ACTIVE_CALLBACK = 5,
   RDE_MSG_NODE_UP = 6,
   RDE_MSG_NODE_DOWN = 7,
-  RDE_MSG_TAKEOVER_REQUEST_CALLBACK = 8
+  RDE_MSG_TAKEOVER_REQUEST_CALLBACK = 8,
+  RDE_MSG_ACTIVE_PROMOTION_SUCCESS = 9
 };
 
 struct rde_peer_info {
diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc
index c5b4b8283..c59aa4536 100644
--- a/src/rde/rded/rde_main.cc
+++ b/src/rde/rded/rde_main.cc
@@ -55,7 +55,8 @@ const char *rde_msg_name[] = {"-",
                               "RDE_MSG_NEW_ACTIVE_CALLBACK(5)"
                               "RDE_MSG_NODE_UP(6)",
                               "RDE_MSG_NODE_DOWN(7)",
-                              "RDE_MSG_TAKEOVER_REQUEST_CALLBACK(8)"};
+                              "RDE_MSG_TAKEOVER_REQUEST_CALLBACK(8)",
+                              "RDE_MSG_ACTIVE_PROMOTION_SUCCESS(9)"};
 
 static RDE_CONTROL_BLOCK _rde_cb;
 static RDE_CONTROL_BLOCK *rde_cb = &_rde_cb;
@@ -186,6 +187,9 @@ static void handle_mbx_event() {
         LOG_WA("Received takeover request when not active");
       }
     } break;
+    case RDE_MSG_ACTIVE_PROMOTION_SUCCESS:
+      role->NodePromoted();
+      break;
     default:
       LOG_ER("%s: discarding unknown message type %u", __FUNCTION__, 
msg->type);
       break;
diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc
index 1b5a6ae89..b6a5df51a 100644
--- a/src/rde/rded/role.cc
+++ b/src/rde/rded/role.cc
@@ -22,6 +22,7 @@
 #include "rde/rded/role.h"
 #include <cinttypes>
 #include <cstdint>
+#include <thread>
 #include "base/getenv.h"
 #include "base/logtrace.h"
 #include "base/ncs_main_papi.h"
@@ -63,6 +64,55 @@ void Role::MonitorCallback(const std::string& key, const 
std::string& new_value,
   osafassert(status == NCSCC_RC_SUCCESS);
 }
 
+void Role::PromoteNode(const uint64_t cluster_size) {
+  TRACE_ENTER();
+  SaAisErrorT rc;
+
+  Consensus consensus_service;
+
+  rc = consensus_service.PromoteThisNode(true, cluster_size);
+  if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) {
+    LOG_ER("Unable to set active controller in consensus service");
+    opensaf_reboot(0, nullptr,
+                   "Unable to set active controller in consensus service");
+  }
+
+  if (rc == SA_AIS_ERR_EXIST) {
+    LOG_WA("Another controller is already active");
+    return;
+  }
+
+  RDE_CONTROL_BLOCK* cb = rde_get_control_block();
+
+  // send msg to main thread
+  rde_msg* msg = static_cast<rde_msg*>(malloc(sizeof(rde_msg)));
+  msg->type = RDE_MSG_ACTIVE_PROMOTION_SUCCESS;
+  uint32_t status;
+  status = m_NCS_IPC_SEND(&cb->mbx, msg, NCS_IPC_PRIORITY_HIGH);
+  osafassert(status == NCSCC_RC_SUCCESS);
+}
+
+void Role::NodePromoted() {
+  ExecutePreActiveScript();
+  LOG_NO("Switched to ACTIVE from %s", to_string(role()));
+  role_ = PCS_RDA_ACTIVE;
+  rde_rda_send_role(role_);
+
+  Consensus consensus_service;
+  RDE_CONTROL_BLOCK* cb = rde_get_control_block();
+
+  // register for callback if active controller is changed
+  // in consensus service
+  if (cb->monitor_lock_thread_running == false) {
+    cb->monitor_lock_thread_running = true;
+    consensus_service.MonitorLock(MonitorCallback, cb->mbx);
+  }
+  if (cb->monitor_takeover_req_thread_running == false) {
+    cb->monitor_takeover_req_thread_running = true;
+    consensus_service.MonitorTakeoverRequest(MonitorCallback, cb->mbx);
+  }
+}
+
 Role::Role(NODE_ID own_node_id)
     : known_nodes_{},
       role_{PCS_RDA_QUIESCED},
@@ -83,36 +133,8 @@ timespec* Role::Poll(timespec* ts) {
       timeout = ts;
     } else {
       RDE_CONTROL_BLOCK* cb = rde_get_control_block();
-      SaAisErrorT rc;
-      Consensus consensus_service;
-
-      rc = consensus_service.PromoteThisNode(true, cb->cluster_members.size());
-      if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) {
-        LOG_ER("Unable to set active controller in consensus service");
-        opensaf_reboot(0, nullptr,
-                       "Unable to set active controller in consensus service");
-      }
-
-      if (rc == SA_AIS_ERR_EXIST) {
-        LOG_WA("Another controller is already active");
-        return timeout;
-      }
-
-      ExecutePreActiveScript();
-      LOG_NO("Switched to ACTIVE from %s", to_string(role()));
-      role_ = PCS_RDA_ACTIVE;
-      rde_rda_send_role(role_);
-
-      // register for callback if active controller is changed
-      // in consensus service
-      if (cb->monitor_lock_thread_running == false) {
-        cb->monitor_lock_thread_running = true;
-        consensus_service.MonitorLock(MonitorCallback, cb->mbx);
-      }
-      if (cb->monitor_takeover_req_thread_running == false) {
-        cb->monitor_takeover_req_thread_running = true;
-        consensus_service.MonitorTakeoverRequest(MonitorCallback, cb->mbx);
-      }
+      std::thread(&Role::PromoteNode,
+                 this, cb->cluster_members.size()).detach();
     }
   }
   return timeout;
diff --git a/src/rde/rded/role.h b/src/rde/rded/role.h
index 59a850988..9780debd0 100644
--- a/src/rde/rded/role.h
+++ b/src/rde/rded/role.h
@@ -41,6 +41,7 @@ class Role {
   static const char* to_string(PCS_RDA_ROLE role);
   static void MonitorCallback(const std::string& key,
                               const std::string& new_value, SYSF_MBX mbx);
+  void NodePromoted();
 
  private:
   static const uint64_t kDefaultDiscoverPeerTimeout = 2000;
@@ -48,6 +49,7 @@ class Role {
   void ExecutePreActiveScript();
   void ResetElectionTimer();
   uint32_t UpdateMdsRegistration(PCS_RDA_ROLE new_role, PCS_RDA_ROLE old_role);
+  void PromoteNode(const uint64_t cluster_size);
 
   std::set<NODE_ID> known_nodes_;
   PCS_RDA_ROLE role_;
-- 
2.14.1


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to