ack, review only. Thanks/Minh

On 21/1/19 2:52 pm, Gary Lee wrote:
Add FMS_TAKEOVER_PRIORITISE_PARTITION_SIZE option to allow
active SC to be preferred during a network split. The default
behavior is to prefer the larger partition to maintain
existing behaviour.

Add configuration support for FMS_RELAXED_NODE_PROMOTION.
---
  src/osaf/consensus/consensus.cc | 39 ++++++++++++++++++++++++++++++++++++---
  src/osaf/consensus/consensus.h  |  9 +++++++--
  src/osaf/consensus/key_value.cc |  8 ++++++--
  3 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/src/osaf/consensus/consensus.cc b/src/osaf/consensus/consensus.cc
index 112af7d..5304c4f 100644
--- a/src/osaf/consensus/consensus.cc
+++ b/src/osaf/consensus/consensus.cc
@@ -64,6 +64,7 @@ SaAisErrorT Consensus::PromoteThisNode(const bool 
graceful_takeover,
                                     cluster_size);
          if (rc != SA_AIS_OK) {
            LOG_WA("Takeover request failed (%d)", rc);
+          rc = SA_AIS_ERR_EXIST;
            return rc;
          }
          take_over_request_created = true;
@@ -99,7 +100,7 @@ SaAisErrorT Consensus::PromoteThisNode(const bool 
graceful_takeover,
    if (rc == SA_AIS_OK) {
      LOG_NO("Active controller set to %s", base::Conf::NodeName().c_str());
    } else {
-    LOG_ER("Failed to promote this node (%u)", rc);
+    LOG_WA("Failed to promote this node (%u)", rc);
    }
return rc;
@@ -197,6 +198,10 @@ bool Consensus::IsWritable() const {
bool Consensus::IsRemoteFencingEnabled() const { return use_remote_fencing_; } +bool Consensus::IsRelaxedNodePromotionEnabled() const {
+  return relaxed_node_promotion_;
+}
+
  std::string Consensus::CurrentActive() const {
    TRACE_ENTER();
    if (use_consensus_ == false) {
@@ -228,6 +233,10 @@ Consensus::Consensus() {
    uint32_t split_brain_enable = base::GetEnv("FMS_SPLIT_BRAIN_PREVENTION", 0);
    std::string kv_store_cmd = base::GetEnv("FMS_KEYVALUE_STORE_PLUGIN_CMD", 
"");
    uint32_t use_remote_fencing = base::GetEnv("FMS_USE_REMOTE_FENCING", 0);
+  uint32_t prioritise_partition_size =
+    base::GetEnv("FMS_TAKEOVER_PRIORITISE_PARTITION_SIZE", 1);
+  uint32_t relaxed_node_promotion =
+    base::GetEnv("FMS_RELAXED_NODE_PROMOTION", 0);
// if not specified in fmd.conf,
    // takeover requests are valid for 20 seconds
@@ -246,6 +255,14 @@ Consensus::Consensus() {
      use_remote_fencing_ = true;
    }
+ if (prioritise_partition_size == 1) {
+    prioritise_partition_size_ = true;
+  }
+
+  if (use_consensus_ == true && relaxed_node_promotion == 1) {
+    relaxed_node_promotion_ = true;
+  }
+
    // needed for base::Conf::NodeName() later
    base::Conf::InitNodeName();
  }
@@ -373,6 +390,10 @@ SaAisErrorT Consensus::CreateTakeoverRequest(const 
std::string& current_owner,
      return CreateTakeoverRequest(current_owner, proposed_owner, cluster_size);
    }
+ if (rc != SA_AIS_OK) {
+     return rc;
+  }
+
    // wait up to max_takeover_retry seconds for request to be answered
    retries = 0;
    while (retries < max_takeover_retry) {
@@ -546,9 +567,21 @@ Consensus::TakeoverState Consensus::HandleTakeoverRequest(
    LOG_NO("Other network size: %" PRIu64 ", our network size: %" PRIu64,
           proposed_cluster_size, cluster_size);
+ const std::string state_str =
+    tokens[static_cast<std::uint8_t>(TakeoverElements::STATE)];
+
    TakeoverState result;
-  if (proposed_cluster_size > cluster_size) {
-    result = TakeoverState::ACCEPTED;
+  if (state_str !=
+        TakeoverStateStr[static_cast<std::uint8_t>(TakeoverState::NEW)]) {
+    return TakeoverState::UNDEFINED;
+  }
+
+  if (prioritise_partition_size_ == true) {
+    if (proposed_cluster_size > cluster_size) {
+      result = TakeoverState::ACCEPTED;
+    } else {
+      result = TakeoverState::REJECTED;
+    }
    } else {
      result = TakeoverState::REJECTED;
    }
diff --git a/src/osaf/consensus/consensus.h b/src/osaf/consensus/consensus.h
index 6421c7c..2fbd3bd 100644
--- a/src/osaf/consensus/consensus.h
+++ b/src/osaf/consensus/consensus.h
@@ -57,6 +57,9 @@ class Consensus {
    // Is remote fencing enabled?
    bool IsRemoteFencingEnabled() const;
+ // Is relaxed node promotion enabled?
+  bool IsRelaxedNodePromotionEnabled() const;
+
    Consensus();
    virtual ~Consensus();
@@ -66,7 +69,7 @@ class Consensus {
      UNDEFINED = 0,
      NEW = 1,
      ACCEPTED = 2,
-    REJECTED = 3,
+    REJECTED = 3
    };
enum class TakeoverElements : std::uint8_t {
@@ -85,13 +88,15 @@ class Consensus {
   private:
    bool use_consensus_ = false;
    bool use_remote_fencing_ = false;
+  bool prioritise_partition_size_ = false;
+  bool relaxed_node_promotion_ = false;
    uint32_t takeover_valid_time;
    uint32_t max_takeover_retry;
    const std::string kTestKeyname = "opensaf_write_test";
    const std::chrono::milliseconds kSleepInterval =
        std::chrono::milliseconds(1000);  // in ms
    static constexpr uint32_t kLockTimeout = 0;  // lock is persistent by 
default
-  static constexpr uint32_t kMaxRetry = 30;
+  static constexpr uint32_t kMaxRetry = 3;
void CheckForExistingTakeoverRequest(); diff --git a/src/osaf/consensus/key_value.cc b/src/osaf/consensus/key_value.cc
index 109cf9f..73a0e70 100644
--- a/src/osaf/consensus/key_value.cc
+++ b/src/osaf/consensus/key_value.cc
@@ -53,6 +53,8 @@ SaAisErrorT KeyValue::Get(const std::string& key, 
std::string& value) {
if (rc == 0) {
      return SA_AIS_OK;
+  } else if (rc == 1) {
+    return SA_AIS_ERR_INVALID_PARAM;
    } else {
      return SA_AIS_ERR_FAILED_OPERATION;
    }
@@ -109,6 +111,8 @@ SaAisErrorT KeyValue::Create(const std::string& key, const 
std::string& value,
      return SA_AIS_OK;
    } else if (rc == 1) {
      return SA_AIS_ERR_EXIST;
+  } else if (rc == 2) {
+    return SA_AIS_ERR_INVALID_PARAM;
    } else {
      return SA_AIS_ERR_FAILED_OPERATION;
    }
@@ -211,7 +215,7 @@ void WatchKeyFunction(const std::string& key, const 
ConsensusCallback& callback,
    int rc;
rc = KeyValue::Execute(command, value);
-  while (rc != 0 && retries < kMaxRetry) {
+  while (rc != 0 && rc < 126 && retries < kMaxRetry) {
      ++retries;
      std::this_thread::sleep_for(kSleepInterval);
      rc = KeyValue::Execute(command, value);
@@ -238,7 +242,7 @@ void WatchLockFunction(const ConsensusCallback& callback,
    int rc;
rc = KeyValue::Execute(command, value);
-  while (rc != 0 && retries < kMaxRetry) {
+  while (rc != 0 && rc < 126 && retries < kMaxRetry) {
      ++retries;
      std::this_thread::sleep_for(kSleepInterval);
      rc = KeyValue::Execute(command, value);


_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to