If consensus service is enabled, only perform node failover after peer controller has self-fenced (after 2 * FMS_TAKEOVER_REQUEST_VALID_TIME seconds).
This also means if node failover delay is set to a large value, we do not unnecesarily wait too long before failing over assignments previously assigned to the peer controller. Remove unused fmd_conf_file variable. Change some LOG_ER calls to LOG_WA. --- src/amf/amfd/cb.h | 1 - src/amf/amfd/clm.cc | 4 ++-- src/amf/amfd/main.cc | 1 - src/amf/amfd/ndfsm.cc | 8 ++++---- src/amf/amfd/ndproc.cc | 19 +++++++++++++++++++ src/amf/amfd/node_state.cc | 23 ++++++++++++----------- src/amf/amfd/node_state_machine.cc | 19 +++++++++++++++++++ src/amf/amfd/node_state_machine.h | 2 ++ src/amf/amfd/proc.h | 1 + 9 files changed, 59 insertions(+), 19 deletions(-) diff --git a/src/amf/amfd/cb.h b/src/amf/amfd/cb.h index 89cf15d..7ac743e 100644 --- a/src/amf/amfd/cb.h +++ b/src/amf/amfd/cb.h @@ -202,7 +202,6 @@ typedef struct cl_cb_tag { AVD_TMR heartbeat_tmr; /* The timer for sending heart beats to nd. */ SaTimeT heartbeat_tmr_period; uint32_t minimum_cluster_size; - std::string fmd_conf_file; uint32_t nodes_exit_cnt; /* The counter to identifies the number of nodes that have exited the membership diff --git a/src/amf/amfd/clm.cc b/src/amf/amfd/clm.cc index aeae939..cfbe36a 100644 --- a/src/amf/amfd/clm.cc +++ b/src/amf/amfd/clm.cc @@ -203,7 +203,7 @@ static void clm_node_exit_complete(SaClmNodeIdT nodeId) { } if (avd_cb->failover_list.count(node->node_info.nodeId) == 0 && - avd_cb->node_failover_delay == 0) { + delay_failover(avd_cb, node->node_info.nodeId) == false) { avd_node_failover(node); avd_node_delete_nodeid(node); } @@ -322,7 +322,7 @@ static void clm_track_cb( LOG_IN("%s: CLM node '%s' is not an AMF cluster member; MDS down received", __FUNCTION__, node_name.c_str()); if (avd_cb->failover_list.count(node->node_info.nodeId) == 0 && - avd_cb->node_failover_delay == 0) { + delay_failover(avd_cb, node->node_info.nodeId) == false) { avd_node_delete_nodeid(node); } goto done; diff --git a/src/amf/amfd/main.cc b/src/amf/amfd/main.cc index e3d0957..03857a1 100644 --- a/src/amf/amfd/main.cc +++ b/src/amf/amfd/main.cc @@ -582,7 +582,6 @@ static uint32_t initialize(void) { } cb->minimum_cluster_size = base::GetEnv("OSAF_AMF_MIN_CLUSTER_SIZE", uint32_t{2}); - cb->fmd_conf_file = base::GetEnv("FMS_CONF_FILE", ""); node_list_db = new AmfDb<uint32_t, AVD_FAIL_OVER_NODE>; amfnd_svc_db = new std::set<uint32_t>; diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc index 7099196..16b2def 100644 --- a/src/amf/amfd/ndfsm.cc +++ b/src/amf/amfd/ndfsm.cc @@ -811,7 +811,7 @@ void avd_mds_avnd_down_evh(AVD_CL_CB *cb, AVD_EVT *evt) { std::shared_ptr<NodeStateMachine> failed_node = cb->failover_list.at(evt->info.node_id); failed_node->MdsDown(); - } else if (cb->node_failover_delay > 0) { + } else if (delay_failover(cb, evt->info.node_id) == true) { LOG_NO("Node '%s' is down. Start failover delay timer", node->node_name.c_str()); @@ -821,10 +821,10 @@ void avd_mds_avnd_down_evh(AVD_CL_CB *cb, AVD_EVT *evt) { } if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) { - if (cb->node_failover_delay == 0) { + check_quorum(cb); + if (delay_failover(cb, evt->info.node_id) == false) { avd_node_failover(node); } - check_quorum(cb); node->node_info.member = SA_FALSE; // Update standby out of sync if standby sc goes down if (avd_cb->node_id_avd_other == node->node_info.nodeId) { @@ -833,7 +833,7 @@ void avd_mds_avnd_down_evh(AVD_CL_CB *cb, AVD_EVT *evt) { m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, node, AVSV_CKPT_AVD_NODE_CONFIG); } - } else if (cb->node_failover_delay == 0) { + } else if (delay_failover(cb, evt->info.node_id) == false) { /* Remove dynamic info for node but keep in nodeid tree. * Possibly used at the end of controller failover to * to failover payload nodes. diff --git a/src/amf/amfd/ndproc.cc b/src/amf/amfd/ndproc.cc index 5f5cbcd..0d30dfe 100644 --- a/src/amf/amfd/ndproc.cc +++ b/src/amf/amfd/ndproc.cc @@ -1277,6 +1277,25 @@ void avd_node_failover(AVD_AVND *node, const bool mw_only) { TRACE_LEAVE(); } +bool delay_failover(const AVD_CL_CB *cb, const SaClmNodeIdT node_id) { + TRACE_ENTER(); + Consensus consensus_service; + bool delay = false; + + if (cb->node_failover_delay > 0) { + delay = true; + } else if (node_id == cb->node_id_avd_other && + consensus_service.IsEnabled() == true && + consensus_service.IsRemoteFencingEnabled() == false) { + // even though node failover delay is set to 0, + // the peer SC will still take some time to self-fence, + // we should wait FMS_TAKEOVER_REQUEST_VALID_TIME + delay = true; + } + + return delay; +} + void check_quorum(AVD_CL_CB *cb) { TRACE_ENTER(); diff --git a/src/amf/amfd/node_state.cc b/src/amf/amfd/node_state.cc index 787ddab..4446981 100644 --- a/src/amf/amfd/node_state.cc +++ b/src/amf/amfd/node_state.cc @@ -17,7 +17,7 @@ Start::Start(NodeStateMachine *fsm) : } void Start::TimerExpired() { - LOG_ER("unexpected timer event"); + LOG_WA("unexpected timer event"); } void Start::MdsUp() { @@ -62,8 +62,9 @@ Lost::Lost(NodeStateMachine *fsm) : NodeState(fsm) { avd_stop_tmr(fsm_->cb_, fsm_->timer_.get()); LOG_NO("Start timer for '%x'", fsm_->node_id_); + avd_start_tmr(fsm_->cb_, fsm_->timer_.get(), - fsm_->cb_->node_failover_delay * SA_TIME_ONE_SECOND); + fsm_->FailoverDelay()); } void Lost::TimerExpired() { @@ -85,7 +86,7 @@ void Lost::TimerExpired() { // wait for checkpoint to transition state // meanwhile, restart timer in case a SC failover to this node occurs avd_start_tmr(fsm_->cb_, fsm_->timer_.get(), - fsm_->cb_->node_failover_delay * SA_TIME_ONE_SECOND); + fsm_->FailoverDelay()); } } @@ -98,12 +99,12 @@ void Lost::MdsUp() { void Lost::MdsDown() { if (fsm_->Active() == true) { - LOG_ER("unexpected MDS down event"); + LOG_WA("unexpected MDS down event"); } } void Lost::NodeUp() { - LOG_ER("unexpected node up event"); + LOG_WA("unexpected node up event"); } // state 'LostFound' @@ -149,7 +150,7 @@ void LostFound::TimerExpired() { void LostFound::MdsUp() { if (fsm_->Active() == true) { - LOG_ER("unexpected MDS up event"); + LOG_WA("unexpected MDS up event"); } } @@ -172,7 +173,7 @@ void LostFound::NodeUp() { } else { // wait for checkpoint to transition state // we are standby and shouldn't get node up - LOG_ER("unexpected node up event"); + LOG_WA("unexpected node up event"); } } @@ -209,7 +210,7 @@ void LostRebooting::TimerExpired() { void LostRebooting::MdsUp() { if (fsm_->Active() == true) { - LOG_ER("unexpected MDS up event"); + LOG_WA("unexpected MDS up event"); } } @@ -234,7 +235,7 @@ void LostRebooting::MdsDown() { } void LostRebooting::NodeUp() { - LOG_ER("unexpected node up event"); + LOG_WA("unexpected node up event"); } // state 'Failed' @@ -245,7 +246,7 @@ Failed::Failed(NodeStateMachine *fsm) : } void Failed::TimerExpired() { - LOG_ER("unexpected timer event"); + LOG_WA("unexpected timer event"); } void Failed::MdsUp() { @@ -305,7 +306,7 @@ void FailedFound::TimerExpired() { void FailedFound::MdsUp() { if (fsm_->Active() == true) { - LOG_ER("unexpected MDS up event"); + LOG_WA("unexpected MDS up event"); } } diff --git a/src/amf/amfd/node_state_machine.cc b/src/amf/amfd/node_state_machine.cc index c5d86d3..4653f79 100644 --- a/src/amf/amfd/node_state_machine.cc +++ b/src/amf/amfd/node_state_machine.cc @@ -1,4 +1,5 @@ #include "base/logtrace.h" +#include "osaf/consensus/consensus.h" #include "amf/amfd/amfd.h" #include "amf/amfd/node_state_machine.h" @@ -93,6 +94,24 @@ uint32_t NodeStateMachine::GetState() { return state_->GetInt(); } +SaTimeT NodeStateMachine::FailoverDelay() const { + TRACE_ENTER(); + + SaTimeT delay; + if (node_id_ == cb_->node_id_avd_other) { + // If peer SC, it's guaranteed to fence after this amount of time + // (2 * FMS_TAKEOVER_REQUEST_VALID_TIME). + // This may be smaller than node_failover_delay. + Consensus consensus_service; + delay = 2 * consensus_service.TakeoverValidTime(); + } else { + delay = cb_->node_failover_delay; + } + + TRACE("delay is %llu", delay); + return delay * SA_TIME_ONE_SECOND; +} + bool NodeStateMachine::Active() { return cb_->avail_state_avd == SA_AMF_HA_ACTIVE; } diff --git a/src/amf/amfd/node_state_machine.h b/src/amf/amfd/node_state_machine.h index 3bfabd0..598642e 100644 --- a/src/amf/amfd/node_state_machine.h +++ b/src/amf/amfd/node_state_machine.h @@ -22,6 +22,8 @@ class NodeStateMachine { void SetState(uint32_t state); uint32_t GetState(); + SaTimeT FailoverDelay() const; + std::shared_ptr<AVD_TMR> timer_; std::shared_ptr<NodeState> state_; diff --git a/src/amf/amfd/proc.h b/src/amf/amfd/proc.h index f1dc7ba..4052aec 100644 --- a/src/amf/amfd/proc.h +++ b/src/amf/amfd/proc.h @@ -96,6 +96,7 @@ void avd_process_hb_event(AVD_CL_CB *cb_now, struct AVD_EVT *evt); extern void avd_node_mark_absent(AVD_AVND *node); extern void avd_tmr_snd_hb_evh(AVD_CL_CB *cb, AVD_EVT *evt); extern void avd_node_failover(AVD_AVND *node, const bool mw_only = false); +bool delay_failover(const AVD_CL_CB *cb, const SaClmNodeIdT node_id); extern void check_quorum(AVD_CL_CB *cb); extern AVD_SU *get_other_su_from_oper_list(AVD_SU *su); extern void su_complete_admin_op(AVD_SU *su, SaAisErrorT result); -- 2.7.4 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel