If consensus service is enabled, only perform node failover
after peer controller has self-fenced
(after 2 * FMS_TAKEOVER_REQUEST_VALID_TIME seconds).

This also means if node failover delay is set to a large value,
we do not unnecesarily wait too long before failing over assignments
previously assigned to the peer controller.

Remove unused fmd_conf_file variable.

Change some LOG_ER calls to LOG_WA.
---
 src/amf/amfd/cb.h                  |  1 -
 src/amf/amfd/clm.cc                |  4 ++--
 src/amf/amfd/main.cc               |  1 -
 src/amf/amfd/ndfsm.cc              |  8 ++++----
 src/amf/amfd/ndproc.cc             | 19 +++++++++++++++++++
 src/amf/amfd/node_state.cc         | 23 ++++++++++++-----------
 src/amf/amfd/node_state_machine.cc | 19 +++++++++++++++++++
 src/amf/amfd/node_state_machine.h  |  2 ++
 src/amf/amfd/proc.h                |  1 +
 9 files changed, 59 insertions(+), 19 deletions(-)

diff --git a/src/amf/amfd/cb.h b/src/amf/amfd/cb.h
index 89cf15d..7ac743e 100644
--- a/src/amf/amfd/cb.h
+++ b/src/amf/amfd/cb.h
@@ -202,7 +202,6 @@ typedef struct cl_cb_tag {
   AVD_TMR heartbeat_tmr; /* The timer for sending heart beats to nd. */
   SaTimeT heartbeat_tmr_period;
   uint32_t minimum_cluster_size;
-  std::string fmd_conf_file;
 
   uint32_t nodes_exit_cnt; /* The counter to identifies the number
                       of nodes that have exited the membership
diff --git a/src/amf/amfd/clm.cc b/src/amf/amfd/clm.cc
index aeae939..cfbe36a 100644
--- a/src/amf/amfd/clm.cc
+++ b/src/amf/amfd/clm.cc
@@ -203,7 +203,7 @@ static void clm_node_exit_complete(SaClmNodeIdT nodeId) {
   }
 
   if (avd_cb->failover_list.count(node->node_info.nodeId) == 0 &&
-    avd_cb->node_failover_delay == 0) {
+    delay_failover(avd_cb, node->node_info.nodeId) == false) {
     avd_node_failover(node);
     avd_node_delete_nodeid(node);
   }
@@ -322,7 +322,7 @@ static void clm_track_cb(
             LOG_IN("%s: CLM node '%s' is not an AMF cluster member; MDS down 
received",
                    __FUNCTION__, node_name.c_str());
             if (avd_cb->failover_list.count(node->node_info.nodeId) == 0 &&
-              avd_cb->node_failover_delay == 0) {
+              delay_failover(avd_cb, node->node_info.nodeId) == false) {
               avd_node_delete_nodeid(node);
             }
             goto done;
diff --git a/src/amf/amfd/main.cc b/src/amf/amfd/main.cc
index e3d0957..03857a1 100644
--- a/src/amf/amfd/main.cc
+++ b/src/amf/amfd/main.cc
@@ -582,7 +582,6 @@ static uint32_t initialize(void) {
   }
   cb->minimum_cluster_size =
       base::GetEnv("OSAF_AMF_MIN_CLUSTER_SIZE", uint32_t{2});
-  cb->fmd_conf_file = base::GetEnv("FMS_CONF_FILE", "");
 
   node_list_db = new AmfDb<uint32_t, AVD_FAIL_OVER_NODE>;
   amfnd_svc_db = new std::set<uint32_t>;
diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc
index 7099196..16b2def 100644
--- a/src/amf/amfd/ndfsm.cc
+++ b/src/amf/amfd/ndfsm.cc
@@ -811,7 +811,7 @@ void avd_mds_avnd_down_evh(AVD_CL_CB *cb, AVD_EVT *evt) {
       std::shared_ptr<NodeStateMachine> failed_node =
         cb->failover_list.at(evt->info.node_id);
       failed_node->MdsDown();
-    } else if (cb->node_failover_delay > 0) {
+    } else if (delay_failover(cb, evt->info.node_id) == true) {
       LOG_NO("Node '%s' is down. Start failover delay timer",
               node->node_name.c_str());
 
@@ -821,10 +821,10 @@ void avd_mds_avnd_down_evh(AVD_CL_CB *cb, AVD_EVT *evt) {
     }
 
     if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) {
-      if (cb->node_failover_delay == 0) {
+      check_quorum(cb);
+      if (delay_failover(cb, evt->info.node_id) == false) {
         avd_node_failover(node);
       }
-      check_quorum(cb);
       node->node_info.member = SA_FALSE;
       // Update standby out of sync if standby sc goes down
       if (avd_cb->node_id_avd_other == node->node_info.nodeId) {
@@ -833,7 +833,7 @@ void avd_mds_avnd_down_evh(AVD_CL_CB *cb, AVD_EVT *evt) {
         m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, node,
                                          AVSV_CKPT_AVD_NODE_CONFIG);
       }
-    } else if (cb->node_failover_delay == 0) {
+    } else if (delay_failover(cb, evt->info.node_id) == false) {
       /* Remove dynamic info for node but keep in nodeid tree.
        * Possibly used at the end of controller failover to
        * to failover payload nodes.
diff --git a/src/amf/amfd/ndproc.cc b/src/amf/amfd/ndproc.cc
index 5f5cbcd..0d30dfe 100644
--- a/src/amf/amfd/ndproc.cc
+++ b/src/amf/amfd/ndproc.cc
@@ -1277,6 +1277,25 @@ void avd_node_failover(AVD_AVND *node, const bool 
mw_only) {
   TRACE_LEAVE();
 }
 
+bool delay_failover(const AVD_CL_CB *cb, const SaClmNodeIdT node_id) {
+  TRACE_ENTER();
+  Consensus consensus_service;
+  bool delay = false;
+
+  if (cb->node_failover_delay > 0) {
+      delay = true;
+  } else if (node_id == cb->node_id_avd_other &&
+             consensus_service.IsEnabled() == true &&
+             consensus_service.IsRemoteFencingEnabled() == false) {
+    // even though node failover delay is set to 0,
+    // the peer SC will still take some time to self-fence,
+    // we should wait FMS_TAKEOVER_REQUEST_VALID_TIME
+    delay = true;
+  }
+
+  return delay;
+}
+
 void check_quorum(AVD_CL_CB *cb) {
   TRACE_ENTER();
 
diff --git a/src/amf/amfd/node_state.cc b/src/amf/amfd/node_state.cc
index 787ddab..4446981 100644
--- a/src/amf/amfd/node_state.cc
+++ b/src/amf/amfd/node_state.cc
@@ -17,7 +17,7 @@ Start::Start(NodeStateMachine *fsm) :
 }
 
 void Start::TimerExpired() {
-  LOG_ER("unexpected timer event");
+  LOG_WA("unexpected timer event");
 }
 
 void Start::MdsUp() {
@@ -62,8 +62,9 @@ Lost::Lost(NodeStateMachine *fsm) :
   NodeState(fsm) {
   avd_stop_tmr(fsm_->cb_, fsm_->timer_.get());
   LOG_NO("Start timer for '%x'", fsm_->node_id_);
+
   avd_start_tmr(fsm_->cb_, fsm_->timer_.get(),
-                fsm_->cb_->node_failover_delay * SA_TIME_ONE_SECOND);
+                fsm_->FailoverDelay());
 }
 
 void Lost::TimerExpired() {
@@ -85,7 +86,7 @@ void Lost::TimerExpired() {
     // wait for checkpoint to transition state
     // meanwhile, restart timer in case a SC failover to this node occurs
     avd_start_tmr(fsm_->cb_, fsm_->timer_.get(),
-                  fsm_->cb_->node_failover_delay * SA_TIME_ONE_SECOND);
+                  fsm_->FailoverDelay());
   }
 }
 
@@ -98,12 +99,12 @@ void Lost::MdsUp() {
 
 void Lost::MdsDown() {
   if (fsm_->Active() == true) {
-    LOG_ER("unexpected MDS down event");
+    LOG_WA("unexpected MDS down event");
   }
 }
 
 void Lost::NodeUp() {
-  LOG_ER("unexpected node up event");
+  LOG_WA("unexpected node up event");
 }
 
 // state 'LostFound'
@@ -149,7 +150,7 @@ void LostFound::TimerExpired() {
 
 void LostFound::MdsUp() {
   if (fsm_->Active() == true) {
-    LOG_ER("unexpected MDS up event");
+    LOG_WA("unexpected MDS up event");
   }
 }
 
@@ -172,7 +173,7 @@ void LostFound::NodeUp() {
   } else {
     // wait for checkpoint to transition state
     // we are standby and shouldn't get node up
-    LOG_ER("unexpected node up event");
+    LOG_WA("unexpected node up event");
   }
 }
 
@@ -209,7 +210,7 @@ void LostRebooting::TimerExpired() {
 
 void LostRebooting::MdsUp() {
   if (fsm_->Active() == true) {
-    LOG_ER("unexpected MDS up event");
+    LOG_WA("unexpected MDS up event");
   }
 }
 
@@ -234,7 +235,7 @@ void LostRebooting::MdsDown() {
 }
 
 void LostRebooting::NodeUp() {
-  LOG_ER("unexpected node up event");
+  LOG_WA("unexpected node up event");
 }
 
 // state 'Failed'
@@ -245,7 +246,7 @@ Failed::Failed(NodeStateMachine *fsm) :
 }
 
 void Failed::TimerExpired() {
-  LOG_ER("unexpected timer event");
+  LOG_WA("unexpected timer event");
 }
 
 void Failed::MdsUp() {
@@ -305,7 +306,7 @@ void FailedFound::TimerExpired() {
 
 void FailedFound::MdsUp() {
   if (fsm_->Active() == true) {
-    LOG_ER("unexpected MDS up event");
+    LOG_WA("unexpected MDS up event");
   }
 }
 
diff --git a/src/amf/amfd/node_state_machine.cc 
b/src/amf/amfd/node_state_machine.cc
index c5d86d3..4653f79 100644
--- a/src/amf/amfd/node_state_machine.cc
+++ b/src/amf/amfd/node_state_machine.cc
@@ -1,4 +1,5 @@
 #include "base/logtrace.h"
+#include "osaf/consensus/consensus.h"
 #include "amf/amfd/amfd.h"
 #include "amf/amfd/node_state_machine.h"
 
@@ -93,6 +94,24 @@ uint32_t NodeStateMachine::GetState() {
   return state_->GetInt();
 }
 
+SaTimeT NodeStateMachine::FailoverDelay() const {
+  TRACE_ENTER();
+
+  SaTimeT delay;
+  if (node_id_ == cb_->node_id_avd_other) {
+    // If peer SC, it's guaranteed to fence after this amount of time
+    // (2 * FMS_TAKEOVER_REQUEST_VALID_TIME).
+    // This may be smaller than node_failover_delay.
+    Consensus consensus_service;
+    delay = 2 * consensus_service.TakeoverValidTime();
+  } else {
+    delay = cb_->node_failover_delay;
+  }
+
+  TRACE("delay is %llu", delay);
+  return delay * SA_TIME_ONE_SECOND;
+}
+
 bool NodeStateMachine::Active() {
   return cb_->avail_state_avd == SA_AMF_HA_ACTIVE;
 }
diff --git a/src/amf/amfd/node_state_machine.h 
b/src/amf/amfd/node_state_machine.h
index 3bfabd0..598642e 100644
--- a/src/amf/amfd/node_state_machine.h
+++ b/src/amf/amfd/node_state_machine.h
@@ -22,6 +22,8 @@ class NodeStateMachine {
   void SetState(uint32_t state);
   uint32_t GetState();
 
+  SaTimeT FailoverDelay() const;
+
   std::shared_ptr<AVD_TMR> timer_;
   std::shared_ptr<NodeState> state_;
 
diff --git a/src/amf/amfd/proc.h b/src/amf/amfd/proc.h
index f1dc7ba..4052aec 100644
--- a/src/amf/amfd/proc.h
+++ b/src/amf/amfd/proc.h
@@ -96,6 +96,7 @@ void avd_process_hb_event(AVD_CL_CB *cb_now, struct AVD_EVT 
*evt);
 extern void avd_node_mark_absent(AVD_AVND *node);
 extern void avd_tmr_snd_hb_evh(AVD_CL_CB *cb, AVD_EVT *evt);
 extern void avd_node_failover(AVD_AVND *node, const bool mw_only = false);
+bool delay_failover(const AVD_CL_CB *cb, const SaClmNodeIdT node_id);
 extern void check_quorum(AVD_CL_CB *cb);
 extern AVD_SU *get_other_su_from_oper_list(AVD_SU *su);
 extern void su_complete_admin_op(AVD_SU *su, SaAisErrorT result);
-- 
2.7.4



_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to