Problem: As described in previous patch: "amfd: Make creation
and deletion of assignment object as IMM synced call [#2416]"
The previous patch makes creation and deletion assignment object
as IMM synced call at the first try. It acts as the prevention of
inconsistency between AMFND and IMM. However, if the first IMM
call fails, a chance of same problem can happen.
This patch continues the approach of absent assignments that is
using node_fail() Sg Fsm to perform failover. This node_fail()
now also is used as a clean up method that iterates and remove
all absent invalid assignments as well as failover valid assignments.
---
src/amf/amfd/cluster.cc | 7 ++++---
src/amf/amfd/sg.cc | 23 +++++++++++++++++------
src/amf/amfd/si.cc | 20 ++++++++++++++++----
src/amf/amfd/su.cc | 42 +++++++++++++++++++++++++++++++++++++-----
src/amf/amfd/su.h | 1 +
5 files changed, 75 insertions(+), 18 deletions(-)
diff --git a/src/amf/amfd/cluster.cc b/src/amf/amfd/cluster.cc
index ef1a0328a..9ec6746a8 100644
--- a/src/amf/amfd/cluster.cc
+++ b/src/amf/amfd/cluster.cc
@@ -97,11 +97,12 @@ void avd_cluster_tmr_init_evh(AVD_CL_CB *cb, AVD_EVT *evt) {
continue;
}
- if (i_sg->any_assignment_absent()) {
+ while (i_sg->any_assignment_absent()) {
// failover with ABSENT SUSI, which had already been removed during
- // headless
+ // headless, until all ABSENT SUSI(s) are failovered successfully
i_sg->failover_absent_assignment();
- } else if (i_sg->any_assignment_in_progress() == false) {
+ }
+ if (i_sg->any_assignment_in_progress() == false) {
i_sg->set_fsm_state(AVD_SG_FSM_STABLE);
}
diff --git a/src/amf/amfd/sg.cc b/src/amf/amfd/sg.cc
index 9b04a423c..74916009c 100644
--- a/src/amf/amfd/sg.cc
+++ b/src/amf/amfd/sg.cc
@@ -2288,16 +2288,27 @@ bool avd_sg_validate_headless_cached_rta(AVD_CL_CB *cb)
{
void AVD_SG::failover_absent_assignment() {
TRACE_ENTER2("SG:'%s'", name.c_str());
+ AVD_SU* failed_su = nullptr;
for (const auto &su : list_of_su) {
if (su->any_susi_fsm_in(AVD_SU_SI_STATE_ABSENT)) {
- node_fail(avd_cb, su);
- if (su->is_in_service())
- su->set_readiness_state(SA_AMF_READINESS_IN_SERVICE);
- if (su->sg_of_su->sg_fsm_state == AVD_SG_FSM_STABLE)
- su->sg_of_su->realign(avd_cb, this);
- break;
+ // look up SU has the most absent STANBY assignment to failover first
+ // TODO: need to verify with NpM and Nway Sg
+ if (failed_su == nullptr) failed_su = su;
+ else if (su->count_susi_with(SA_AMF_HA_STANDBY, AVD_SU_SI_STATE_ABSENT) >
+ failed_su->count_susi_with(SA_AMF_HA_STANDBY,
+ AVD_SU_SI_STATE_ABSENT)) {
+ failed_su = su;
+ }
}
}
+
+ if (failed_su != nullptr) {
+ node_fail(avd_cb, failed_su);
+ if (failed_su->is_in_service())
+ failed_su->set_readiness_state(SA_AMF_READINESS_IN_SERVICE);
+ }
+ if (sg_fsm_state == AVD_SG_FSM_STABLE)
+ realign(avd_cb, this);
TRACE_LEAVE();
}
diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc
index 298188a84..6b38a6a11 100644
--- a/src/amf/amfd/si.cc
+++ b/src/amf/amfd/si.cc
@@ -1402,7 +1402,10 @@ void AVD_SI::inc_curr_act_ass() {
}
void AVD_SI::dec_curr_act_ass() {
- osafassert(saAmfSINumCurrActiveAssignments > 0);
+ if (saAmfSINumCurrActiveAssignments == 0) {
+ LOG_WA("Failed to decrease saAmfSINumCurrActiveAssignments");
+ return;
+ }
saAmfSINumCurrActiveAssignments--;
TRACE("%s saAmfSINumCurrActiveAssignments=%u", name.c_str(),
saAmfSINumCurrActiveAssignments);
@@ -1419,7 +1422,10 @@ void AVD_SI::inc_curr_stdby_ass() {
}
void AVD_SI::dec_curr_stdby_ass() {
- osafassert(saAmfSINumCurrStandbyAssignments > 0);
+ if (saAmfSINumCurrStandbyAssignments == 0) {
+ LOG_WA("Failed to decrease saAmfSINumCurrStandbyAssignments");
+ return;
+ }
saAmfSINumCurrStandbyAssignments--;
TRACE("%s saAmfSINumCurrStandbyAssignments=%u", name.c_str(),
saAmfSINumCurrStandbyAssignments);
@@ -1433,7 +1439,10 @@ void AVD_SI::inc_curr_act_dec_std_ass() {
TRACE("%s saAmfSINumCurrActiveAssignments=%u", name.c_str(),
saAmfSINumCurrActiveAssignments);
- osafassert(saAmfSINumCurrStandbyAssignments > 0);
+ if (saAmfSINumCurrStandbyAssignments == 0) {
+ LOG_WA("Failed to decrease saAmfSINumCurrStandbyAssignments");
+ return;
+ }
saAmfSINumCurrStandbyAssignments--;
TRACE("%s saAmfSINumCurrStandbyAssignments=%u", name.c_str(),
saAmfSINumCurrStandbyAssignments);
@@ -1448,7 +1457,10 @@ void AVD_SI::inc_curr_stdby_dec_act_ass() {
saAmfSINumCurrStandbyAssignments);
m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, this, AVSV_CKPT_SI_SU_CURR_STBY);
- osafassert(saAmfSINumCurrActiveAssignments > 0);
+ if (saAmfSINumCurrActiveAssignments == 0) {
+ LOG_WA("Failed to decrease saAmfSINumCurrActiveAssignments");
+ return;
+ }
saAmfSINumCurrActiveAssignments--;
TRACE("%s saAmfSINumCurrActiveAssignments=%u", name.c_str(),
saAmfSINumCurrActiveAssignments);
diff --git a/src/amf/amfd/su.cc b/src/amf/amfd/su.cc
index fac1188b5..2a1923c04 100644
--- a/src/amf/amfd/su.cc
+++ b/src/amf/amfd/su.cc
@@ -2127,29 +2127,45 @@ static void su_ccb_apply_cb(CcbUtilOperationData_t
*opdata) {
}
void AVD_SU::inc_curr_act_si() {
+ if (saAmfSUNumCurrActiveSIs >= sg_of_su->saAmfSGMaxActiveSIsperSU) {
+ LOG_WA("Failed to increase saAmfSUNumCurrActiveSIs(%u), "
+ "saAmfSGMaxActiveSIsperSU(%u)", saAmfSUNumCurrActiveSIs,
+ sg_of_su->saAmfSGMaxActiveSIsperSU);
+ return;
+ }
saAmfSUNumCurrActiveSIs++;
- osafassert(saAmfSUNumCurrActiveSIs <= sg_of_su->saAmfSGMaxActiveSIsperSU);
TRACE("%s saAmfSUNumCurrActiveSIs=%u", name.c_str(),
saAmfSUNumCurrActiveSIs);
m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, this, AVSV_CKPT_SU_SI_CURR_ACTIVE);
}
void AVD_SU::dec_curr_act_si() {
- osafassert(saAmfSUNumCurrActiveSIs > 0);
+ if (saAmfSUNumCurrActiveSIs == 0) {
+ LOG_WA("Failed to decrease saAmfSUNumCurrActiveSIs");
+ return;
+ }
saAmfSUNumCurrActiveSIs--;
TRACE("%s saAmfSUNumCurrActiveSIs=%u", name.c_str(),
saAmfSUNumCurrActiveSIs);
m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, this, AVSV_CKPT_SU_SI_CURR_ACTIVE);
}
void AVD_SU::inc_curr_stdby_si() {
+ if (saAmfSUNumCurrStandbySIs >= sg_of_su->saAmfSGMaxStandbySIsperSU) {
+ LOG_WA("Failed to increase saAmfSUNumCurrStandbySIs(%u), "
+ "saAmfSGMaxStandbySIsperSU(%u)", saAmfSUNumCurrStandbySIs,
+ sg_of_su->saAmfSGMaxStandbySIsperSU);
+ return;
+ }
saAmfSUNumCurrStandbySIs++;
- osafassert(saAmfSUNumCurrStandbySIs <= sg_of_su->saAmfSGMaxStandbySIsperSU);
TRACE("%s saAmfSUNumCurrStandbySIs=%u", name.c_str(),
saAmfSUNumCurrStandbySIs);
m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, this, AVSV_CKPT_SU_SI_CURR_STBY);
}
void AVD_SU::dec_curr_stdby_si() {
- osafassert(saAmfSUNumCurrStandbySIs > 0);
+ if (saAmfSUNumCurrStandbySIs == 0) {
+ LOG_WA("Failed to decrease saAmfSUNumCurrStandbySIs");
+ return;
+ }
saAmfSUNumCurrStandbySIs--;
TRACE("%s saAmfSUNumCurrStandbySIs=%u", name.c_str(),
saAmfSUNumCurrStandbySIs);
@@ -2527,6 +2543,22 @@ uint32_t AVD_SU::curr_num_active_sis() {
}
/**
+ * @brief Count number of assignment belonging to *this* SU object
+ * that has @ha and @fsm.
+ * @param ha: HA state of searching object
+ * fsm: fsm state of searching object
+ * @return count
+ */
+uint32_t AVD_SU::count_susi_with(SaAmfHAStateT ha, uint32_t fsm) {
+ uint32_t count = 0;
+ for (AVD_SU_SI_REL *susi = list_of_susi; susi != nullptr;
+ susi = susi->su_next)
+ if ((susi->state == ha) && (susi->fsm == fsm))
+ count++;
+ return count;
+}
+
+/**
* @brief This function completes admin operation on SU.
* It responds IMM with the result of admin operation on SU.
* @param ptr to su
@@ -2732,4 +2764,4 @@ bool AVD_SU::restrict_auto_repair() const
}
return false;
-}
\ No newline at end of file
+}
diff --git a/src/amf/amfd/su.h b/src/amf/amfd/su.h
index 9fece7a13..96c6b803f 100644
--- a/src/amf/amfd/su.h
+++ b/src/amf/amfd/su.h
@@ -143,6 +143,7 @@ class AVD_SU {
SaAisErrorT check_su_stability();
uint32_t curr_num_standby_sis();
uint32_t curr_num_active_sis();
+ uint32_t count_susi_with(SaAmfHAStateT ha, uint32_t fsm);
bool su_any_comp_undergoing_restart_admin_op();
AVD_COMP *su_get_comp_undergoing_restart_admin_op();
bool su_all_comps_restartable();
--
2.11.0
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel