A check to make sure the consensus service is writable (ie. the SC is in a partition with quorum) is present in avd_node_failover(). However, [#2918] means this function is not always being called. We need to move it. --- src/amf/amfd/ndfsm.cc | 1 + src/amf/amfd/ndproc.cc | 10 +++++++--- src/amf/amfd/proc.h | 1 + 3 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc index 301de835b..218551cdd 100644 --- a/src/amf/amfd/ndfsm.cc +++ b/src/amf/amfd/ndfsm.cc @@ -817,6 +817,7 @@ void avd_mds_avnd_down_evh(AVD_CL_CB *cb, AVD_EVT *evt) { if (cb->node_failover_delay == 0) { avd_node_failover(node); } + check_quorum(); node->node_info.member = SA_FALSE; // Update standby out of sync if standby sc goes down if (avd_cb->node_id_avd_other == node->node_info.nodeId) { diff --git a/src/amf/amfd/ndproc.cc b/src/amf/amfd/ndproc.cc index 853a68b6e..c4eebb174 100644 --- a/src/amf/amfd/ndproc.cc +++ b/src/amf/amfd/ndproc.cc @@ -1242,6 +1242,12 @@ void avd_node_failover(AVD_AVND *node, const bool mw_only) { avd_node_down_appl_susi_failover(avd_cb, node); } + TRACE_LEAVE(); +} + +void check_quorum() { + TRACE_ENTER(); + Consensus consensus_service; if (consensus_service.IsRemoteFencingEnabled() == false && consensus_service.IsWritable() == false) { @@ -1250,6 +1256,4 @@ void avd_node_failover(AVD_AVND *node, const bool mw_only) { opensaf_reboot(0, nullptr, "Quorum lost. Rebooting this node to prevent split-brain"); } - - TRACE_LEAVE(); -} +} \ No newline at end of file diff --git a/src/amf/amfd/proc.h b/src/amf/amfd/proc.h index 99d1cbfc2..a37821829 100644 --- a/src/amf/amfd/proc.h +++ b/src/amf/amfd/proc.h @@ -96,6 +96,7 @@ void avd_process_hb_event(AVD_CL_CB *cb_now, struct AVD_EVT *evt); extern void avd_node_mark_absent(AVD_AVND *node); extern void avd_tmr_snd_hb_evh(AVD_CL_CB *cb, AVD_EVT *evt); extern void avd_node_failover(AVD_AVND *node, const bool mw_only = false); +extern void check_quorum(); extern AVD_SU *get_other_su_from_oper_list(AVD_SU *su); extern void su_complete_admin_op(AVD_SU *su, SaAisErrorT result); extern void comp_complete_admin_op(AVD_COMP *comp, SaAisErrorT result); -- 2.17.1 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel