Currently, a takeover request is not sent to the main thread immediately so that MDS messages related to topology changes are processed first.
If the plugin informs us it has lost connectivity to the consensus service by returning 'UNDEFINED', or we prioritise the current active SC, then we should not delay the request. This will speed up self-fencing of the current active node (if required). --- src/osaf/consensus/consensus.cc | 33 +++++++++++++++++++++++++++++++-- src/osaf/consensus/consensus.h | 8 +++++++- src/rde/rded/role.cc | 11 +++++++---- 3 files changed, 45 insertions(+), 7 deletions(-) diff --git a/src/osaf/consensus/consensus.cc b/src/osaf/consensus/consensus.cc index 9c98b77..878e3a7 100644 --- a/src/osaf/consensus/consensus.cc +++ b/src/osaf/consensus/consensus.cc @@ -202,6 +202,10 @@ bool Consensus::IsRelaxedNodePromotionEnabled() const { return relaxed_node_promotion_; } +bool Consensus::PrioritisePartitionSize() const { + return prioritise_partition_size_; +} + std::string Consensus::CurrentActive() const { TRACE_ENTER(); if (use_consensus_ == false) { @@ -464,7 +468,8 @@ SaAisErrorT Consensus::WriteTakeoverResult( } SaAisErrorT Consensus::ParseTakeoverRequest(const std::string& request, - std::vector<std::string>& tokens) { + std::vector<std::string>& tokens) + const { TRACE_ENTER(); if (request.empty() == true) { @@ -477,7 +482,7 @@ SaAisErrorT Consensus::ParseTakeoverRequest(const std::string& request, tokens.clear(); Split(request, tokens); if (tokens.size() != 4) { - LOG_ER("Invalid takeover request: '%s'", request.c_str()); + LOG_WA("Invalid takeover request: '%s'", request.c_str()); return SA_AIS_ERR_LIBRARY; } @@ -600,6 +605,30 @@ Consensus::TakeoverState Consensus::HandleTakeoverRequest( return result; } +// Determine if plugin is telling us to self-fence due to loss +// of connectivity to the KV store +bool Consensus::SelfFence(const std::string& request) const { + TRACE_ENTER(); + + bool fence = false; + SaAisErrorT rc; + std::vector<std::string> tokens; + + if (request.empty() == false) { + rc = ParseTakeoverRequest(request, tokens); + if (rc == SA_AIS_OK) { + const std::string state_str = + tokens[static_cast<std::uint8_t>(TakeoverElements::STATE)]; + + if (state_str == + TakeoverStateStr[static_cast<std::uint8_t>(TakeoverState::UNDEFINED)]) { + fence = true; + } + } + } + return fence; +} + // separate space delimited elements in a string void Consensus::Split(const std::string& str, std::vector<std::string>& tokens) const { diff --git a/src/osaf/consensus/consensus.h b/src/osaf/consensus/consensus.h index 2fbd3bd..c408a6f 100644 --- a/src/osaf/consensus/consensus.h +++ b/src/osaf/consensus/consensus.h @@ -60,6 +60,12 @@ class Consensus { // Is relaxed node promotion enabled? bool IsRelaxedNodePromotionEnabled() const; + bool PrioritisePartitionSize() const; + + // Determine if plugin is telling us to self-fence due to loss + // of connectivity to the KV store + bool SelfFence(const std::string& request) const; + Consensus(); virtual ~Consensus(); @@ -105,7 +111,7 @@ class Consensus { const uint64_t cluster_size); SaAisErrorT ParseTakeoverRequest(const std::string& request, - std::vector<std::string>& tokens); + std::vector<std::string>& tokens) const; SaAisErrorT ReadTakeoverRequest(std::vector<std::string>& tokens); SaAisErrorT WriteTakeoverResult(const std::string& current_owner, diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc index a967bd5..5f107ed 100644 --- a/src/rde/rded/role.cc +++ b/src/rde/rded/role.cc @@ -52,6 +52,7 @@ void Role::MonitorCallback(const std::string& key, const std::string& new_value, rde_msg* msg = static_cast<rde_msg*>(malloc(sizeof(rde_msg))); if (key == Consensus::kTakeoverRequestKeyname) { std::string request; + Consensus consensus_service; if (new_value.empty() == true) { // sometimes the KV store plugin doesn't return the new value, @@ -62,7 +63,6 @@ void Role::MonitorCallback(const std::string& key, const std::string& new_value, SaAisErrorT rc = SA_AIS_ERR_TRY_AGAIN; constexpr uint8_t max_retry = 5; uint8_t retries = 0; - Consensus consensus_service; while (retries < max_retry && rc != SA_AIS_OK) { rc = consensus_service.ReadTakeoverRequest(request); @@ -73,15 +73,18 @@ void Role::MonitorCallback(const std::string& key, const std::string& new_value, request = new_value; } - // don't send this to the main thread straight away, as it will - // need some time to process topology changes. msg->type = RDE_MSG_TAKEOVER_REQUEST_CALLBACK; size_t len = request.length() + 1; msg->info.takeover_request = new char[len]; strncpy(msg->info.takeover_request, request.c_str(), len); LOG_NO("Sending takeover request '%s' to main thread", msg->info.takeover_request); - std::this_thread::sleep_for(std::chrono::seconds(4)); + if (consensus_service.SelfFence(request) == false && + consensus_service.PrioritisePartitionSize() == true) { + // don't send this to the main thread straight away, as it will + // need some time to process topology changes. + std::this_thread::sleep_for(std::chrono::seconds(4)); + } } else { msg->type = RDE_MSG_NEW_ACTIVE_CALLBACK; } -- 2.7.4 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel