If FMS_TAKEOVER_PRIORITISE_PARTITION_SIZE is enabled,
make the time that we wait for MDS node events configurable.
---
 src/fm/fmd/fm_rda.cc            | 4 +++-
 src/fm/fmd/fmd.conf             | 5 +++++
 src/osaf/consensus/consensus.cc | 9 +++++++++
 src/osaf/consensus/consensus.h  | 2 ++
 src/rde/rded/role.cc            | 4 +++-
 5 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/fm/fmd/fm_rda.cc b/src/fm/fmd/fm_rda.cc
index c072cb0..fca417f 100644
--- a/src/fm/fmd/fm_rda.cc
+++ b/src/fm/fmd/fm_rda.cc
@@ -75,7 +75,9 @@ void promote_node(FM_CB *fm_cb) {
     // be processing MDS down events and updating cluster_size concurrently.
     // We need cluster_size to be as accurate as possible, without waiting
     // too long for node down events.
-    std::this_thread::sleep_for(std::chrono::seconds(2));
+    std::this_thread::sleep_for(
+      std::chrono::seconds(
+        consensus_service.PrioritisePartitionSizeWaitTime()));
   }
 
   uint32_t rc;
diff --git a/src/fm/fmd/fmd.conf b/src/fm/fmd/fmd.conf
index 209e484..4dbf53a 100644
--- a/src/fm/fmd/fmd.conf
+++ b/src/fm/fmd/fmd.conf
@@ -36,6 +36,11 @@ export FMS_TAKEOVER_REQUEST_VALID_TIME=20
 # Default is 1
 #export FMS_TAKEOVER_PRIORITISE_PARTITION_SIZE=1
 
+# If FMS_TAKEOVER_PRIORITISE_PARTITION_SIZE is set to 1, wait until
+# this number of seconds for MDS events before making a decision
+# on partition size. Default is 4 seconds
+#export FMS_TAKEOVER_PRIORITISE_PARTITION_SIZE_MDS_WAIT_TIME=4
+
 # Default behaviour is not to allow promotion of this node to Active
 # unless a lock can be obtained, if split brain prevention is enabled.
 # Uncomment the next line to allow promotion of this node at cluster startup,
diff --git a/src/osaf/consensus/consensus.cc b/src/osaf/consensus/consensus.cc
index 814885e..0e37fa3 100644
--- a/src/osaf/consensus/consensus.cc
+++ b/src/osaf/consensus/consensus.cc
@@ -207,6 +207,10 @@ bool Consensus::PrioritisePartitionSize() const {
   return prioritise_partition_size_;
 }
 
+uint32_t Consensus::PrioritisePartitionSizeWaitTime() const {
+  return prioritise_partition_size_mds_wait_time_;
+}
+
 uint32_t Consensus::TakeoverValidTime() const {
   return takeover_valid_time_;
 }
@@ -253,6 +257,8 @@ void Consensus::ProcessEnvironmentSettings() {
   uint32_t use_remote_fencing = base::GetEnv("FMS_USE_REMOTE_FENCING", 0);
   uint32_t prioritise_partition_size =
     base::GetEnv("FMS_TAKEOVER_PRIORITISE_PARTITION_SIZE", 1);
+  uint32_t prioritise_partition_size_mds_wait_time =
+    base::GetEnv("FMS_TAKEOVER_PRIORITISE_PARTITION_SIZE_MDS_WAIT_TIME", 4);
   uint32_t relaxed_node_promotion =
     base::GetEnv("FMS_RELAXED_NODE_PROMOTION", 0);
   config_file_ = base::GetEnv("FMS_CONF_FILE", "");
@@ -281,6 +287,9 @@ void Consensus::ProcessEnvironmentSettings() {
   if (use_consensus_ == true && relaxed_node_promotion == 1) {
     relaxed_node_promotion_ = true;
   }
+
+  prioritise_partition_size_mds_wait_time_ =
+    prioritise_partition_size_mds_wait_time;
 }
 
 bool Consensus::ReloadConfiguration() {
diff --git a/src/osaf/consensus/consensus.h b/src/osaf/consensus/consensus.h
index 1fabf90..1aba561 100644
--- a/src/osaf/consensus/consensus.h
+++ b/src/osaf/consensus/consensus.h
@@ -61,6 +61,7 @@ class Consensus {
   bool IsRelaxedNodePromotionEnabled() const;
 
   bool PrioritisePartitionSize() const;
+  uint32_t PrioritisePartitionSizeWaitTime() const;
 
   uint32_t TakeoverValidTime() const;
 
@@ -100,6 +101,7 @@ class Consensus {
   bool use_consensus_{false};
   bool use_remote_fencing_{false};
   bool prioritise_partition_size_{true};
+  uint32_t prioritise_partition_size_mds_wait_time_{4};
   bool relaxed_node_promotion_{false};
   uint32_t takeover_valid_time_{20};
   uint32_t max_takeover_retry_{0};
diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc
index b8c8157..b890117 100644
--- a/src/rde/rded/role.cc
+++ b/src/rde/rded/role.cc
@@ -83,7 +83,9 @@ void Role::MonitorCallback(const std::string& key, const 
std::string& new_value,
         consensus_service.PrioritisePartitionSize() == true) {
       // don't send this to the main thread straight away, as it will
       // need some time to process topology changes.
-      std::this_thread::sleep_for(std::chrono::seconds(4));
+      std::this_thread::sleep_for(
+        std::chrono::seconds(
+          consensus_service.PrioritisePartitionSizeWaitTime()));
     }
   } else {
     msg->type = RDE_MSG_NEW_ACTIVE_CALLBACK;
-- 
2.7.4



_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to