Add FMS_TAKEOVER_PRIORITISE_PARTITION_SIZE option to allow active SC to be preferred during a network split. The default behavior is to prefer the larger partition to maintain existing behaviour.
Add configuration support for FMS_RELAXED_NODE_PROMOTION. --- src/osaf/consensus/consensus.cc | 39 ++++++++++++++++++++++++++++++++++++--- src/osaf/consensus/consensus.h | 9 +++++++-- src/osaf/consensus/key_value.cc | 8 ++++++-- 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/src/osaf/consensus/consensus.cc b/src/osaf/consensus/consensus.cc index 112af7d..5304c4f 100644 --- a/src/osaf/consensus/consensus.cc +++ b/src/osaf/consensus/consensus.cc @@ -64,6 +64,7 @@ SaAisErrorT Consensus::PromoteThisNode(const bool graceful_takeover, cluster_size); if (rc != SA_AIS_OK) { LOG_WA("Takeover request failed (%d)", rc); + rc = SA_AIS_ERR_EXIST; return rc; } take_over_request_created = true; @@ -99,7 +100,7 @@ SaAisErrorT Consensus::PromoteThisNode(const bool graceful_takeover, if (rc == SA_AIS_OK) { LOG_NO("Active controller set to %s", base::Conf::NodeName().c_str()); } else { - LOG_ER("Failed to promote this node (%u)", rc); + LOG_WA("Failed to promote this node (%u)", rc); } return rc; @@ -197,6 +198,10 @@ bool Consensus::IsWritable() const { bool Consensus::IsRemoteFencingEnabled() const { return use_remote_fencing_; } +bool Consensus::IsRelaxedNodePromotionEnabled() const { + return relaxed_node_promotion_; +} + std::string Consensus::CurrentActive() const { TRACE_ENTER(); if (use_consensus_ == false) { @@ -228,6 +233,10 @@ Consensus::Consensus() { uint32_t split_brain_enable = base::GetEnv("FMS_SPLIT_BRAIN_PREVENTION", 0); std::string kv_store_cmd = base::GetEnv("FMS_KEYVALUE_STORE_PLUGIN_CMD", ""); uint32_t use_remote_fencing = base::GetEnv("FMS_USE_REMOTE_FENCING", 0); + uint32_t prioritise_partition_size = + base::GetEnv("FMS_TAKEOVER_PRIORITISE_PARTITION_SIZE", 1); + uint32_t relaxed_node_promotion = + base::GetEnv("FMS_RELAXED_NODE_PROMOTION", 0); // if not specified in fmd.conf, // takeover requests are valid for 20 seconds @@ -246,6 +255,14 @@ Consensus::Consensus() { use_remote_fencing_ = true; } + if (prioritise_partition_size == 1) { + prioritise_partition_size_ = true; + } + + if (use_consensus_ == true && relaxed_node_promotion == 1) { + relaxed_node_promotion_ = true; + } + // needed for base::Conf::NodeName() later base::Conf::InitNodeName(); } @@ -373,6 +390,10 @@ SaAisErrorT Consensus::CreateTakeoverRequest(const std::string& current_owner, return CreateTakeoverRequest(current_owner, proposed_owner, cluster_size); } + if (rc != SA_AIS_OK) { + return rc; + } + // wait up to max_takeover_retry seconds for request to be answered retries = 0; while (retries < max_takeover_retry) { @@ -546,9 +567,21 @@ Consensus::TakeoverState Consensus::HandleTakeoverRequest( LOG_NO("Other network size: %" PRIu64 ", our network size: %" PRIu64, proposed_cluster_size, cluster_size); + const std::string state_str = + tokens[static_cast<std::uint8_t>(TakeoverElements::STATE)]; + TakeoverState result; - if (proposed_cluster_size > cluster_size) { - result = TakeoverState::ACCEPTED; + if (state_str != + TakeoverStateStr[static_cast<std::uint8_t>(TakeoverState::NEW)]) { + return TakeoverState::UNDEFINED; + } + + if (prioritise_partition_size_ == true) { + if (proposed_cluster_size > cluster_size) { + result = TakeoverState::ACCEPTED; + } else { + result = TakeoverState::REJECTED; + } } else { result = TakeoverState::REJECTED; } diff --git a/src/osaf/consensus/consensus.h b/src/osaf/consensus/consensus.h index 6421c7c..2fbd3bd 100644 --- a/src/osaf/consensus/consensus.h +++ b/src/osaf/consensus/consensus.h @@ -57,6 +57,9 @@ class Consensus { // Is remote fencing enabled? bool IsRemoteFencingEnabled() const; + // Is relaxed node promotion enabled? + bool IsRelaxedNodePromotionEnabled() const; + Consensus(); virtual ~Consensus(); @@ -66,7 +69,7 @@ class Consensus { UNDEFINED = 0, NEW = 1, ACCEPTED = 2, - REJECTED = 3, + REJECTED = 3 }; enum class TakeoverElements : std::uint8_t { @@ -85,13 +88,15 @@ class Consensus { private: bool use_consensus_ = false; bool use_remote_fencing_ = false; + bool prioritise_partition_size_ = false; + bool relaxed_node_promotion_ = false; uint32_t takeover_valid_time; uint32_t max_takeover_retry; const std::string kTestKeyname = "opensaf_write_test"; const std::chrono::milliseconds kSleepInterval = std::chrono::milliseconds(1000); // in ms static constexpr uint32_t kLockTimeout = 0; // lock is persistent by default - static constexpr uint32_t kMaxRetry = 30; + static constexpr uint32_t kMaxRetry = 3; void CheckForExistingTakeoverRequest(); diff --git a/src/osaf/consensus/key_value.cc b/src/osaf/consensus/key_value.cc index 109cf9f..73a0e70 100644 --- a/src/osaf/consensus/key_value.cc +++ b/src/osaf/consensus/key_value.cc @@ -53,6 +53,8 @@ SaAisErrorT KeyValue::Get(const std::string& key, std::string& value) { if (rc == 0) { return SA_AIS_OK; + } else if (rc == 1) { + return SA_AIS_ERR_INVALID_PARAM; } else { return SA_AIS_ERR_FAILED_OPERATION; } @@ -109,6 +111,8 @@ SaAisErrorT KeyValue::Create(const std::string& key, const std::string& value, return SA_AIS_OK; } else if (rc == 1) { return SA_AIS_ERR_EXIST; + } else if (rc == 2) { + return SA_AIS_ERR_INVALID_PARAM; } else { return SA_AIS_ERR_FAILED_OPERATION; } @@ -211,7 +215,7 @@ void WatchKeyFunction(const std::string& key, const ConsensusCallback& callback, int rc; rc = KeyValue::Execute(command, value); - while (rc != 0 && retries < kMaxRetry) { + while (rc != 0 && rc < 126 && retries < kMaxRetry) { ++retries; std::this_thread::sleep_for(kSleepInterval); rc = KeyValue::Execute(command, value); @@ -238,7 +242,7 @@ void WatchLockFunction(const ConsensusCallback& callback, int rc; rc = KeyValue::Execute(command, value); - while (rc != 0 && retries < kMaxRetry) { + while (rc != 0 && rc < 126 && retries < kMaxRetry) { ++retries; std::this_thread::sleep_for(kSleepInterval); rc = KeyValue::Execute(command, value); -- 2.7.4 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel