Hi Gary,
ack, code review only. /Thanks HansN
On 08/22/2018 07:01 AM, Gary Lee wrote:
---
src/fm/fmd/fmd.conf | 4 ++++
src/osaf/consensus/consensus.cc | 24 ++++++++++++++++--------
src/osaf/consensus/consensus.h | 4 ++--
3 files changed, 22 insertions(+), 10 deletions(-)
diff --git a/src/fm/fmd/fmd.conf b/src/fm/fmd/fmd.conf
index 9aff54970..9a106bf90 100644
--- a/src/fm/fmd/fmd.conf
+++ b/src/fm/fmd/fmd.conf
@@ -23,6 +23,10 @@ export FMS_NODE_ISOLATION_TIMEOUT=0
# To enable split brain prevention, change to 1
#export FMS_SPLIT_BRAIN_PREVENTION=0
+# Used with split brain prevention, this controls
+# the expiration time of takeover requests (unit is seconds)
+export FMS_TAKEOVER_REQUEST_VALID_TIME=20
+
# Full path to key-value store plugin
#export FMS_KEYVALUE_STORE_PLUGIN_CMD=
diff --git a/src/osaf/consensus/consensus.cc b/src/osaf/consensus/consensus.cc
index dc5c9bc46..1136c3724 100644
--- a/src/osaf/consensus/consensus.cc
+++ b/src/osaf/consensus/consensus.cc
@@ -229,6 +229,13 @@ Consensus::Consensus() {
std::string kv_store_cmd = base::GetEnv("FMS_KEYVALUE_STORE_PLUGIN_CMD",
"");
uint32_t use_remote_fencing = base::GetEnv("FMS_USE_REMOTE_FENCING", 0);
+ // if not specified in fmd.conf,
+ // takeover requests are valid for 20 seconds
+ takeover_valid_time =
+ base::GetEnv("FMS_TAKEOVER_REQUEST_VALID_TIME", 20);
+ // expiration time of takeover request is twice the max wait time
+ max_takeover_retry = takeover_valid_time / 2;
+
if (split_brain_enable == 1 && kv_store_cmd.empty() == false) {
use_consensus_ = true;
} else {
@@ -293,10 +300,11 @@ void Consensus::CheckForExistingTakeoverRequest() {
LOG_NO("A takeover request is in progress");
uint32_t retries = 0;
- // wait up to approximately 10 seconds, or until the takeover request is gone
+ // wait up to max_takeover_retry seconds,
+ // or until the takeover request is gone
rc = ReadTakeoverRequest(tokens);
while (rc == SA_AIS_OK &&
- retries < kMaxTakeoverRetry) {
+ retries < max_takeover_retry) {
++retries;
TRACE("Takeover request still present");
std::this_thread::sleep_for(kSleepInterval);
@@ -326,7 +334,7 @@ SaAisErrorT Consensus::CreateTakeoverRequest(const
std::string& current_owner,
SaAisErrorT rc;
uint32_t retries = 0;
rc = KeyValue::Create(kTakeoverRequestKeyname, takeover_request,
- kTakeoverValidTime);
+ takeover_valid_time);
while (rc == SA_AIS_ERR_FAILED_OPERATION && retries < kMaxRetry) {
++retries;
std::this_thread::sleep_for(kSleepInterval);
@@ -339,11 +347,11 @@ SaAisErrorT Consensus::CreateTakeoverRequest(const
std::string& current_owner,
// retrieve takeover request
std::vector<std::string> tokens;
retries = 0;
- // wait up to approximately 10 seconds, or until the takeover request is
- // gone
+ // wait up to approximately max_takeover_retry seconds,
+ // or until the takeover request is gone
rc = ReadTakeoverRequest(tokens);
while (rc == SA_AIS_OK &&
- retries < kMaxTakeoverRetry) {
+ retries < max_takeover_retry) {
++retries;
TRACE("Takeover request still present");
std::this_thread::sleep_for(kSleepInterval);
@@ -364,9 +372,9 @@ SaAisErrorT Consensus::CreateTakeoverRequest(const
std::string& current_owner,
return CreateTakeoverRequest(current_owner, proposed_owner, cluster_size);
}
- // wait up to 15s for request to be answered
+ // wait up to max_takeover_retry seconds for request to be answered
retries = 0;
- while (retries < (kMaxTakeoverRetry * 1.5)) {
+ while (retries < max_takeover_retry) {
std::vector<std::string> tokens;
if (ReadTakeoverRequest(tokens) == SA_AIS_OK) {
const std::string state =
diff --git a/src/osaf/consensus/consensus.h b/src/osaf/consensus/consensus.h
index a606d9de1..6421c7ca7 100644
--- a/src/osaf/consensus/consensus.h
+++ b/src/osaf/consensus/consensus.h
@@ -85,13 +85,13 @@ class Consensus {
private:
bool use_consensus_ = false;
bool use_remote_fencing_ = false;
+ uint32_t takeover_valid_time;
+ uint32_t max_takeover_retry;
const std::string kTestKeyname = "opensaf_write_test";
const std::chrono::milliseconds kSleepInterval =
std::chrono::milliseconds(1000); // in ms
static constexpr uint32_t kLockTimeout = 0; // lock is persistent by
default
- static constexpr uint32_t kMaxTakeoverRetry = 20;
static constexpr uint32_t kMaxRetry = 30;
- static constexpr uint32_t kTakeoverValidTime = 15; // in seconds
void CheckForExistingTakeoverRequest();
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel