--- src/fm/fmd/fmd.conf | 4 ++++ src/osaf/consensus/consensus.cc | 24 ++++++++++++++++-------- src/osaf/consensus/consensus.h | 4 ++-- 3 files changed, 22 insertions(+), 10 deletions(-)
diff --git a/src/fm/fmd/fmd.conf b/src/fm/fmd/fmd.conf index 9aff54970..9a106bf90 100644 --- a/src/fm/fmd/fmd.conf +++ b/src/fm/fmd/fmd.conf @@ -23,6 +23,10 @@ export FMS_NODE_ISOLATION_TIMEOUT=0 # To enable split brain prevention, change to 1 #export FMS_SPLIT_BRAIN_PREVENTION=0 +# Used with split brain prevention, this controls +# the expiration time of takeover requests (unit is seconds) +export FMS_TAKEOVER_REQUEST_VALID_TIME=20 + # Full path to key-value store plugin #export FMS_KEYVALUE_STORE_PLUGIN_CMD= diff --git a/src/osaf/consensus/consensus.cc b/src/osaf/consensus/consensus.cc index dc5c9bc46..1136c3724 100644 --- a/src/osaf/consensus/consensus.cc +++ b/src/osaf/consensus/consensus.cc @@ -229,6 +229,13 @@ Consensus::Consensus() { std::string kv_store_cmd = base::GetEnv("FMS_KEYVALUE_STORE_PLUGIN_CMD", ""); uint32_t use_remote_fencing = base::GetEnv("FMS_USE_REMOTE_FENCING", 0); + // if not specified in fmd.conf, + // takeover requests are valid for 20 seconds + takeover_valid_time = + base::GetEnv("FMS_TAKEOVER_REQUEST_VALID_TIME", 20); + // expiration time of takeover request is twice the max wait time + max_takeover_retry = takeover_valid_time / 2; + if (split_brain_enable == 1 && kv_store_cmd.empty() == false) { use_consensus_ = true; } else { @@ -293,10 +300,11 @@ void Consensus::CheckForExistingTakeoverRequest() { LOG_NO("A takeover request is in progress"); uint32_t retries = 0; - // wait up to approximately 10 seconds, or until the takeover request is gone + // wait up to max_takeover_retry seconds, + // or until the takeover request is gone rc = ReadTakeoverRequest(tokens); while (rc == SA_AIS_OK && - retries < kMaxTakeoverRetry) { + retries < max_takeover_retry) { ++retries; TRACE("Takeover request still present"); std::this_thread::sleep_for(kSleepInterval); @@ -326,7 +334,7 @@ SaAisErrorT Consensus::CreateTakeoverRequest(const std::string& current_owner, SaAisErrorT rc; uint32_t retries = 0; rc = KeyValue::Create(kTakeoverRequestKeyname, takeover_request, - kTakeoverValidTime); + takeover_valid_time); while (rc == SA_AIS_ERR_FAILED_OPERATION && retries < kMaxRetry) { ++retries; std::this_thread::sleep_for(kSleepInterval); @@ -339,11 +347,11 @@ SaAisErrorT Consensus::CreateTakeoverRequest(const std::string& current_owner, // retrieve takeover request std::vector<std::string> tokens; retries = 0; - // wait up to approximately 10 seconds, or until the takeover request is - // gone + // wait up to approximately max_takeover_retry seconds, + // or until the takeover request is gone rc = ReadTakeoverRequest(tokens); while (rc == SA_AIS_OK && - retries < kMaxTakeoverRetry) { + retries < max_takeover_retry) { ++retries; TRACE("Takeover request still present"); std::this_thread::sleep_for(kSleepInterval); @@ -364,9 +372,9 @@ SaAisErrorT Consensus::CreateTakeoverRequest(const std::string& current_owner, return CreateTakeoverRequest(current_owner, proposed_owner, cluster_size); } - // wait up to 15s for request to be answered + // wait up to max_takeover_retry seconds for request to be answered retries = 0; - while (retries < (kMaxTakeoverRetry * 1.5)) { + while (retries < max_takeover_retry) { std::vector<std::string> tokens; if (ReadTakeoverRequest(tokens) == SA_AIS_OK) { const std::string state = diff --git a/src/osaf/consensus/consensus.h b/src/osaf/consensus/consensus.h index a606d9de1..6421c7ca7 100644 --- a/src/osaf/consensus/consensus.h +++ b/src/osaf/consensus/consensus.h @@ -85,13 +85,13 @@ class Consensus { private: bool use_consensus_ = false; bool use_remote_fencing_ = false; + uint32_t takeover_valid_time; + uint32_t max_takeover_retry; const std::string kTestKeyname = "opensaf_write_test"; const std::chrono::milliseconds kSleepInterval = std::chrono::milliseconds(1000); // in ms static constexpr uint32_t kLockTimeout = 0; // lock is persistent by default - static constexpr uint32_t kMaxTakeoverRetry = 20; static constexpr uint32_t kMaxRetry = 30; - static constexpr uint32_t kTakeoverValidTime = 15; // in seconds void CheckForExistingTakeoverRequest(); -- 2.17.1 ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel