Currently, the consensus code relating to node promotion is run from the main thread. We can improve rded's responsiveness by moving this code into another thread. --- src/rde/rded/rde_cb.h | 3 +- src/rde/rded/rde_main.cc | 6 +++- src/rde/rded/role.cc | 82 ++++++++++++++++++++++++++++++------------------ src/rde/rded/role.h | 2 ++ 4 files changed, 61 insertions(+), 32 deletions(-)
diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h index f5ad689c3..877687341 100644 --- a/src/rde/rded/rde_cb.h +++ b/src/rde/rded/rde_cb.h @@ -53,7 +53,8 @@ enum RDE_MSG_TYPE { RDE_MSG_NEW_ACTIVE_CALLBACK = 5, RDE_MSG_NODE_UP = 6, RDE_MSG_NODE_DOWN = 7, - RDE_MSG_TAKEOVER_REQUEST_CALLBACK = 8 + RDE_MSG_TAKEOVER_REQUEST_CALLBACK = 8, + RDE_MSG_ACTIVE_PROMOTION_SUCCESS = 9 }; struct rde_peer_info { diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc index c5b4b8283..c59aa4536 100644 --- a/src/rde/rded/rde_main.cc +++ b/src/rde/rded/rde_main.cc @@ -55,7 +55,8 @@ const char *rde_msg_name[] = {"-", "RDE_MSG_NEW_ACTIVE_CALLBACK(5)" "RDE_MSG_NODE_UP(6)", "RDE_MSG_NODE_DOWN(7)", - "RDE_MSG_TAKEOVER_REQUEST_CALLBACK(8)"}; + "RDE_MSG_TAKEOVER_REQUEST_CALLBACK(8)", + "RDE_MSG_ACTIVE_PROMOTION_SUCCESS(9)"}; static RDE_CONTROL_BLOCK _rde_cb; static RDE_CONTROL_BLOCK *rde_cb = &_rde_cb; @@ -186,6 +187,9 @@ static void handle_mbx_event() { LOG_WA("Received takeover request when not active"); } } break; + case RDE_MSG_ACTIVE_PROMOTION_SUCCESS: + role->NodePromoted(); + break; default: LOG_ER("%s: discarding unknown message type %u", __FUNCTION__, msg->type); break; diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc index 1b5a6ae89..b6a5df51a 100644 --- a/src/rde/rded/role.cc +++ b/src/rde/rded/role.cc @@ -22,6 +22,7 @@ #include "rde/rded/role.h" #include <cinttypes> #include <cstdint> +#include <thread> #include "base/getenv.h" #include "base/logtrace.h" #include "base/ncs_main_papi.h" @@ -63,6 +64,55 @@ void Role::MonitorCallback(const std::string& key, const std::string& new_value, osafassert(status == NCSCC_RC_SUCCESS); } +void Role::PromoteNode(const uint64_t cluster_size) { + TRACE_ENTER(); + SaAisErrorT rc; + + Consensus consensus_service; + + rc = consensus_service.PromoteThisNode(true, cluster_size); + if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) { + LOG_ER("Unable to set active controller in consensus service"); + opensaf_reboot(0, nullptr, + "Unable to set active controller in consensus service"); + } + + if (rc == SA_AIS_ERR_EXIST) { + LOG_WA("Another controller is already active"); + return; + } + + RDE_CONTROL_BLOCK* cb = rde_get_control_block(); + + // send msg to main thread + rde_msg* msg = static_cast<rde_msg*>(malloc(sizeof(rde_msg))); + msg->type = RDE_MSG_ACTIVE_PROMOTION_SUCCESS; + uint32_t status; + status = m_NCS_IPC_SEND(&cb->mbx, msg, NCS_IPC_PRIORITY_HIGH); + osafassert(status == NCSCC_RC_SUCCESS); +} + +void Role::NodePromoted() { + ExecutePreActiveScript(); + LOG_NO("Switched to ACTIVE from %s", to_string(role())); + role_ = PCS_RDA_ACTIVE; + rde_rda_send_role(role_); + + Consensus consensus_service; + RDE_CONTROL_BLOCK* cb = rde_get_control_block(); + + // register for callback if active controller is changed + // in consensus service + if (cb->monitor_lock_thread_running == false) { + cb->monitor_lock_thread_running = true; + consensus_service.MonitorLock(MonitorCallback, cb->mbx); + } + if (cb->monitor_takeover_req_thread_running == false) { + cb->monitor_takeover_req_thread_running = true; + consensus_service.MonitorTakeoverRequest(MonitorCallback, cb->mbx); + } +} + Role::Role(NODE_ID own_node_id) : known_nodes_{}, role_{PCS_RDA_QUIESCED}, @@ -83,36 +133,8 @@ timespec* Role::Poll(timespec* ts) { timeout = ts; } else { RDE_CONTROL_BLOCK* cb = rde_get_control_block(); - SaAisErrorT rc; - Consensus consensus_service; - - rc = consensus_service.PromoteThisNode(true, cb->cluster_members.size()); - if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) { - LOG_ER("Unable to set active controller in consensus service"); - opensaf_reboot(0, nullptr, - "Unable to set active controller in consensus service"); - } - - if (rc == SA_AIS_ERR_EXIST) { - LOG_WA("Another controller is already active"); - return timeout; - } - - ExecutePreActiveScript(); - LOG_NO("Switched to ACTIVE from %s", to_string(role())); - role_ = PCS_RDA_ACTIVE; - rde_rda_send_role(role_); - - // register for callback if active controller is changed - // in consensus service - if (cb->monitor_lock_thread_running == false) { - cb->monitor_lock_thread_running = true; - consensus_service.MonitorLock(MonitorCallback, cb->mbx); - } - if (cb->monitor_takeover_req_thread_running == false) { - cb->monitor_takeover_req_thread_running = true; - consensus_service.MonitorTakeoverRequest(MonitorCallback, cb->mbx); - } + std::thread(&Role::PromoteNode, + this, cb->cluster_members.size()).detach(); } } return timeout; diff --git a/src/rde/rded/role.h b/src/rde/rded/role.h index 59a850988..9780debd0 100644 --- a/src/rde/rded/role.h +++ b/src/rde/rded/role.h @@ -41,6 +41,7 @@ class Role { static const char* to_string(PCS_RDA_ROLE role); static void MonitorCallback(const std::string& key, const std::string& new_value, SYSF_MBX mbx); + void NodePromoted(); private: static const uint64_t kDefaultDiscoverPeerTimeout = 2000; @@ -48,6 +49,7 @@ class Role { void ExecutePreActiveScript(); void ResetElectionTimer(); uint32_t UpdateMdsRegistration(PCS_RDA_ROLE new_role, PCS_RDA_ROLE old_role); + void PromoteNode(const uint64_t cluster_size); std::set<NODE_ID> known_nodes_; PCS_RDA_ROLE role_; -- 2.14.1 ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel