Hi Gary,

ack, code review only. /Thanks HansN


On 08/22/2018 07:01 AM, Gary Lee wrote:
---
  src/fm/fmd/fmd.conf             |  4 ++++
  src/osaf/consensus/consensus.cc | 24 ++++++++++++++++--------
  src/osaf/consensus/consensus.h  |  4 ++--
  3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/src/fm/fmd/fmd.conf b/src/fm/fmd/fmd.conf
index 9aff54970..9a106bf90 100644
--- a/src/fm/fmd/fmd.conf
+++ b/src/fm/fmd/fmd.conf
@@ -23,6 +23,10 @@ export FMS_NODE_ISOLATION_TIMEOUT=0
  # To enable split brain prevention, change to 1
  #export FMS_SPLIT_BRAIN_PREVENTION=0
+# Used with split brain prevention, this controls
+# the expiration time of takeover requests (unit is seconds)
+export FMS_TAKEOVER_REQUEST_VALID_TIME=20
+
  # Full path to key-value store plugin
  #export FMS_KEYVALUE_STORE_PLUGIN_CMD=
diff --git a/src/osaf/consensus/consensus.cc b/src/osaf/consensus/consensus.cc
index dc5c9bc46..1136c3724 100644
--- a/src/osaf/consensus/consensus.cc
+++ b/src/osaf/consensus/consensus.cc
@@ -229,6 +229,13 @@ Consensus::Consensus() {
    std::string kv_store_cmd = base::GetEnv("FMS_KEYVALUE_STORE_PLUGIN_CMD", 
"");
    uint32_t use_remote_fencing = base::GetEnv("FMS_USE_REMOTE_FENCING", 0);
+ // if not specified in fmd.conf,
+  // takeover requests are valid for 20 seconds
+  takeover_valid_time =
+    base::GetEnv("FMS_TAKEOVER_REQUEST_VALID_TIME", 20);
+  // expiration time of takeover request is twice the max wait time
+  max_takeover_retry = takeover_valid_time / 2;
+
    if (split_brain_enable == 1 && kv_store_cmd.empty() == false) {
      use_consensus_ = true;
    } else {
@@ -293,10 +300,11 @@ void Consensus::CheckForExistingTakeoverRequest() {
    LOG_NO("A takeover request is in progress");
uint32_t retries = 0;
-  // wait up to approximately 10 seconds, or until the takeover request is gone
+  // wait up to max_takeover_retry seconds,
+  // or until the takeover request is gone
    rc = ReadTakeoverRequest(tokens);
    while (rc == SA_AIS_OK &&
-         retries < kMaxTakeoverRetry) {
+         retries < max_takeover_retry) {
      ++retries;
      TRACE("Takeover request still present");
      std::this_thread::sleep_for(kSleepInterval);
@@ -326,7 +334,7 @@ SaAisErrorT Consensus::CreateTakeoverRequest(const 
std::string& current_owner,
    SaAisErrorT rc;
    uint32_t retries = 0;
    rc = KeyValue::Create(kTakeoverRequestKeyname, takeover_request,
-                        kTakeoverValidTime);
+                        takeover_valid_time);
    while (rc == SA_AIS_ERR_FAILED_OPERATION && retries < kMaxRetry) {
      ++retries;
      std::this_thread::sleep_for(kSleepInterval);
@@ -339,11 +347,11 @@ SaAisErrorT Consensus::CreateTakeoverRequest(const 
std::string& current_owner,
      // retrieve takeover request
      std::vector<std::string> tokens;
      retries = 0;
-    // wait up to approximately 10 seconds, or until the takeover request is
-    // gone
+    // wait up to approximately max_takeover_retry seconds,
+    // or until the takeover request is gone
      rc = ReadTakeoverRequest(tokens);
      while (rc == SA_AIS_OK &&
-           retries < kMaxTakeoverRetry) {
+           retries < max_takeover_retry) {
        ++retries;
        TRACE("Takeover request still present");
        std::this_thread::sleep_for(kSleepInterval);
@@ -364,9 +372,9 @@ SaAisErrorT Consensus::CreateTakeoverRequest(const 
std::string& current_owner,
      return CreateTakeoverRequest(current_owner, proposed_owner, cluster_size);
    }
- // wait up to 15s for request to be answered
+  // wait up to max_takeover_retry seconds for request to be answered
    retries = 0;
-  while (retries < (kMaxTakeoverRetry * 1.5)) {
+  while (retries < max_takeover_retry) {
      std::vector<std::string> tokens;
      if (ReadTakeoverRequest(tokens) == SA_AIS_OK) {
        const std::string state =
diff --git a/src/osaf/consensus/consensus.h b/src/osaf/consensus/consensus.h
index a606d9de1..6421c7ca7 100644
--- a/src/osaf/consensus/consensus.h
+++ b/src/osaf/consensus/consensus.h
@@ -85,13 +85,13 @@ class Consensus {
   private:
    bool use_consensus_ = false;
    bool use_remote_fencing_ = false;
+  uint32_t takeover_valid_time;
+  uint32_t max_takeover_retry;
    const std::string kTestKeyname = "opensaf_write_test";
    const std::chrono::milliseconds kSleepInterval =
        std::chrono::milliseconds(1000);  // in ms
    static constexpr uint32_t kLockTimeout = 0;  // lock is persistent by 
default
-  static constexpr uint32_t kMaxTakeoverRetry = 20;
    static constexpr uint32_t kMaxRetry = 30;
-  static constexpr uint32_t kTakeoverValidTime = 15;  // in seconds
void CheckForExistingTakeoverRequest();


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to