Currently, the consensus code relating to node promotion
is run from the main thread. We can improve rded's
responsiveness by moving this code into another thread.
---
src/rde/rded/rde_cb.h | 3 +-
src/rde/rded/rde_main.cc | 6 +++-
src/rde/rded/role.cc | 82 ++++++++++++++++++++++++++++++------------------
src/rde/rded/role.h | 2 ++
4 files changed, 61 insertions(+), 32 deletions(-)
diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h
index f5ad689c3..877687341 100644
--- a/src/rde/rded/rde_cb.h
+++ b/src/rde/rded/rde_cb.h
@@ -53,7 +53,8 @@ enum RDE_MSG_TYPE {
RDE_MSG_NEW_ACTIVE_CALLBACK = 5,
RDE_MSG_NODE_UP = 6,
RDE_MSG_NODE_DOWN = 7,
- RDE_MSG_TAKEOVER_REQUEST_CALLBACK = 8
+ RDE_MSG_TAKEOVER_REQUEST_CALLBACK = 8,
+ RDE_MSG_ACTIVE_PROMOTION_SUCCESS = 9
};
struct rde_peer_info {
diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc
index c5b4b8283..c59aa4536 100644
--- a/src/rde/rded/rde_main.cc
+++ b/src/rde/rded/rde_main.cc
@@ -55,7 +55,8 @@ const char *rde_msg_name[] = {"-",
"RDE_MSG_NEW_ACTIVE_CALLBACK(5)"
"RDE_MSG_NODE_UP(6)",
"RDE_MSG_NODE_DOWN(7)",
- "RDE_MSG_TAKEOVER_REQUEST_CALLBACK(8)"};
+ "RDE_MSG_TAKEOVER_REQUEST_CALLBACK(8)",
+ "RDE_MSG_ACTIVE_PROMOTION_SUCCESS(9)"};
static RDE_CONTROL_BLOCK _rde_cb;
static RDE_CONTROL_BLOCK *rde_cb = &_rde_cb;
@@ -186,6 +187,9 @@ static void handle_mbx_event() {
LOG_WA("Received takeover request when not active");
}
} break;
+ case RDE_MSG_ACTIVE_PROMOTION_SUCCESS:
+ role->NodePromoted();
+ break;
default:
LOG_ER("%s: discarding unknown message type %u", __FUNCTION__,
msg->type);
break;
diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc
index 1b5a6ae89..b6a5df51a 100644
--- a/src/rde/rded/role.cc
+++ b/src/rde/rded/role.cc
@@ -22,6 +22,7 @@
#include "rde/rded/role.h"
#include <cinttypes>
#include <cstdint>
+#include <thread>
#include "base/getenv.h"
#include "base/logtrace.h"
#include "base/ncs_main_papi.h"
@@ -63,6 +64,55 @@ void Role::MonitorCallback(const std::string& key, const
std::string& new_value,
osafassert(status == NCSCC_RC_SUCCESS);
}
+void Role::PromoteNode(const uint64_t cluster_size) {
+ TRACE_ENTER();
+ SaAisErrorT rc;
+
+ Consensus consensus_service;
+
+ rc = consensus_service.PromoteThisNode(true, cluster_size);
+ if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) {
+ LOG_ER("Unable to set active controller in consensus service");
+ opensaf_reboot(0, nullptr,
+ "Unable to set active controller in consensus service");
+ }
+
+ if (rc == SA_AIS_ERR_EXIST) {
+ LOG_WA("Another controller is already active");
+ return;
+ }
+
+ RDE_CONTROL_BLOCK* cb = rde_get_control_block();
+
+ // send msg to main thread
+ rde_msg* msg = static_cast<rde_msg*>(malloc(sizeof(rde_msg)));
+ msg->type = RDE_MSG_ACTIVE_PROMOTION_SUCCESS;
+ uint32_t status;
+ status = m_NCS_IPC_SEND(&cb->mbx, msg, NCS_IPC_PRIORITY_HIGH);
+ osafassert(status == NCSCC_RC_SUCCESS);
+}
+
+void Role::NodePromoted() {
+ ExecutePreActiveScript();
+ LOG_NO("Switched to ACTIVE from %s", to_string(role()));
+ role_ = PCS_RDA_ACTIVE;
+ rde_rda_send_role(role_);
+
+ Consensus consensus_service;
+ RDE_CONTROL_BLOCK* cb = rde_get_control_block();
+
+ // register for callback if active controller is changed
+ // in consensus service
+ if (cb->monitor_lock_thread_running == false) {
+ cb->monitor_lock_thread_running = true;
+ consensus_service.MonitorLock(MonitorCallback, cb->mbx);
+ }
+ if (cb->monitor_takeover_req_thread_running == false) {
+ cb->monitor_takeover_req_thread_running = true;
+ consensus_service.MonitorTakeoverRequest(MonitorCallback, cb->mbx);
+ }
+}
+
Role::Role(NODE_ID own_node_id)
: known_nodes_{},
role_{PCS_RDA_QUIESCED},
@@ -83,36 +133,8 @@ timespec* Role::Poll(timespec* ts) {
timeout = ts;
} else {
RDE_CONTROL_BLOCK* cb = rde_get_control_block();
- SaAisErrorT rc;
- Consensus consensus_service;
-
- rc = consensus_service.PromoteThisNode(true, cb->cluster_members.size());
- if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) {
- LOG_ER("Unable to set active controller in consensus service");
- opensaf_reboot(0, nullptr,
- "Unable to set active controller in consensus service");
- }
-
- if (rc == SA_AIS_ERR_EXIST) {
- LOG_WA("Another controller is already active");
- return timeout;
- }
-
- ExecutePreActiveScript();
- LOG_NO("Switched to ACTIVE from %s", to_string(role()));
- role_ = PCS_RDA_ACTIVE;
- rde_rda_send_role(role_);
-
- // register for callback if active controller is changed
- // in consensus service
- if (cb->monitor_lock_thread_running == false) {
- cb->monitor_lock_thread_running = true;
- consensus_service.MonitorLock(MonitorCallback, cb->mbx);
- }
- if (cb->monitor_takeover_req_thread_running == false) {
- cb->monitor_takeover_req_thread_running = true;
- consensus_service.MonitorTakeoverRequest(MonitorCallback, cb->mbx);
- }
+ std::thread(&Role::PromoteNode,
+ this, cb->cluster_members.size()).detach();
}
}
return timeout;
diff --git a/src/rde/rded/role.h b/src/rde/rded/role.h
index 59a850988..9780debd0 100644
--- a/src/rde/rded/role.h
+++ b/src/rde/rded/role.h
@@ -41,6 +41,7 @@ class Role {
static const char* to_string(PCS_RDA_ROLE role);
static void MonitorCallback(const std::string& key,
const std::string& new_value, SYSF_MBX mbx);
+ void NodePromoted();
private:
static const uint64_t kDefaultDiscoverPeerTimeout = 2000;
@@ -48,6 +49,7 @@ class Role {
void ExecutePreActiveScript();
void ResetElectionTimer();
uint32_t UpdateMdsRegistration(PCS_RDA_ROLE new_role, PCS_RDA_ROLE old_role);
+ void PromoteNode(const uint64_t cluster_size);
std::set<NODE_ID> known_nodes_;
PCS_RDA_ROLE role_;
--
2.14.1
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel