src/amf/amfd/sg_nway_fsm.cc | 192 +++++++++++++++++++++++++++++++------------ src/amf/amfd/su.cc | 11 ++ src/amf/amfd/su.h | 1 + 3 files changed, 151 insertions(+), 53 deletions(-)
During lock/shutdown operation on SU, Node and NG, some times AMFD is not removing quiesced assignments. The ticket reports this problem when sidep is enabled and disabled. This issue comes in a specific situation when AMFD is unable to switchover all the assignments of locked SU and AMFD removes the locked SU from the oper_list before the removal of all assignments. Also in realign state the case to send removal of assignments to quiesced SU when new active SU is in oper_list is not handled. Assignment distribution is like this : SI1 and SI2 active in SU1 and SI3 standby in SU1. SI1 and SI2 standby in SU3 and SI3 active in SU2. During SU1 lock, AMFD sends quiesced HA state for SI1 and SI2 to SU1 and add it in oper_list and marks SG in SU_OPER. After SI1 gets quiesced, AMFD sends active for it to SU2. Before SI1 becomes active in SU2, SI2 becomes quiesced. SU2 cannot take further active assignment because of configuration limitation. AMFD sends removal of SI2 in both SU1 and SU2. AMFD gets removal response from SU1 for SI2 and it removes SU1 from oper_list and marks SG in REALIGN state. When SI1 becomes active in SU2, AMFD does not remove quiesced assignment from SU1 as this case is not handled. Patch fixes the problem to send removal of assignment when SIs becomes active in other SUs and when SI dep is not configured. Note: Patch does not cover SI dep issue. diff --git a/src/amf/amfd/sg_nway_fsm.cc b/src/amf/amfd/sg_nway_fsm.cc --- a/src/amf/amfd/sg_nway_fsm.cc +++ b/src/amf/amfd/sg_nway_fsm.cc @@ -45,7 +45,9 @@ static void avd_sg_nway_node_fail_sg_rea static AVD_SU_SI_REL * find_pref_standby_susi(AVD_SU_SI_REL *sisu); /* macro to determine if all the active sis assigned to a - particular su have been successfully engaged by the standby */ + particular su have been successfully engaged by the standby + TODO: Replace with avd_sg_nway_are_stdby_sus_engaged(). + */ #define m_AVD_SG_NWAY_ARE_STDBY_SUS_ENGAGED(su, susi, is) \ { \ AVD_SU_SI_REL *csusi = 0, *csisu = 0; \ @@ -74,6 +76,95 @@ static AVD_SU_SI_REL * find_pref_standby } \ }; +/* + * @brief Function equivalent of above macro to determine if all the + * active sis assigned to a particular su have been successfully + * engaged by the standby + * + * @param[in] ptr to SU. + * @param[in] ptr to SUSI. + * + * @return true/false. + */ +bool avd_sg_nway_are_stdby_sus_engaged(AVD_SU *su, AVD_SU_SI_REL *susi) { + TRACE_ENTER(); + AVD_SU_SI_REL *csusi = nullptr, *csisu = nullptr; + bool is_si_eng; + bool ret = true; + for (csusi = su->list_of_susi; csusi; csusi = csusi->su_next) { + if ((csusi == susi) || (SA_AMF_HA_STANDBY == csusi->state)) + continue; + /* + Case1:If csusi is going active, quiesced or quiescing modification in this SU + then wait for its completion. + */ + if ((SA_AMF_HA_ACTIVE == csusi->state) || + (SA_AMF_HA_QUIESCING == csusi->state) || + ((SA_AMF_HA_QUIESCED == csusi->state) && + (AVD_SU_SI_STATE_MODIFY == csusi->fsm))) { + ret = false; + break; + } + //Case2: Check if csusi is Active in any other SU. + is_si_eng = false; + for (csisu = csusi->si->list_of_sisu; csisu; csisu = csisu->si_next) { + if (SA_AMF_HA_ACTIVE == csisu->state) + break; + } + /* + -If csisu is not nullptr then it cannot be in this SU otherwise + we would have returned from case1. + -If csisu is nullptr then it is either STANDBY or + (Quiesced, fsm = Assigned). In both of these cases it is eligible + for deletion as its failover/switchover cannot be done. + Note: In future, for case of SI deps, csisu can be in quiesced, assigned state + because failover of sponsor in this SU is going on. + -Check if csisu has already become active for this csisu->SI in + some other SU. If it is so that means standby SUSI for SI + is already engaged. + */ + if (!csisu || (csisu && (AVD_SU_SI_STATE_ASGND == csisu->fsm))) + is_si_eng = true; + if (false == is_si_eng) { + ret = false; + break; + } + } + TRACE_LEAVE2("ret:%u",ret); + return ret; +} + +/** + * @brief Checks if SUSIs are in either quiesced (ignores standby HA state). + * @return true/false + */ +static bool all_active_susis_are_quieced(const AVD_SU *su) { + AVD_SU_SI_REL *susi; + for (susi = su->list_of_susi; susi; susi = susi->su_next) { + if (susi->state == SA_AMF_HA_STANDBY) + continue; + if ((susi->state == SA_AMF_HA_QUIESCING) || + (susi->state == SA_AMF_HA_ACTIVE)) + return false; + //check if quiesced modification is completed or not. + if ((susi->state == SA_AMF_HA_QUIESCED) && + (susi->fsm == AVD_SU_SI_STATE_MODIFY)) + return false; + } + return true; +} + +static void su_or_node_shutting_down_to_locked(AVD_SU *su) { + if ((su->saAmfSUAdminState == SA_AMF_ADMIN_SHUTTING_DOWN) && + ((all_active_susis_are_quieced(su) == true) || + (su->all_susis_fsm_state(AVD_SU_SI_STATE_UNASGN) == true))) { + su->set_admin_state(SA_AMF_ADMIN_LOCKED); + } else if (su->su_on_node->saAmfNodeAdminState == SA_AMF_ADMIN_SHUTTING_DOWN) { + if (su->su_on_node->is_node_lock() == true) + node_admin_state_set(su->su_on_node, SA_AMF_ADMIN_LOCKED); + } +} + uint32_t SG_NWAY::si_assign(AVD_CL_CB *cb, AVD_SI *si) { uint32_t rc = NCSCC_RC_SUCCESS; @@ -2209,6 +2300,7 @@ uint32_t avd_sg_nway_susi_succ_sg_realig } else { /* check if su is present in the su-oper list */ m_AVD_CHK_OPLIST(su, is_su_present); + TRACE("is_su_present:%u",is_su_present); if (is_su_present) { /* if other susis of this SU are assigned, remove it from the su-oper list */ for (curr_susi = su->list_of_susi; @@ -2217,43 +2309,28 @@ uint32_t avd_sg_nway_susi_succ_sg_realig if (!curr_susi) avd_sg_su_oper_list_del(cb, su, false); + } else { if ((susi->state == SA_AMF_HA_ACTIVE) && (susi->si->num_dependents > 0)) { avd_sidep_send_active_to_dependents(susi->si); } - - /* identify the quiesced susi assignment */ - for (curr_susi = susi->si->list_of_sisu; - curr_susi && (SA_AMF_HA_QUIESCED != curr_susi->state); - curr_susi = curr_susi->si_next) ; - - if (curr_susi) { - /* the corresponding su should be in the su-oper list */ - m_AVD_CHK_OPLIST(curr_susi->su, is_su_present); - if (is_su_present) { - /* determine if all the standby sus are engaged */ - m_AVD_SG_NWAY_ARE_STDBY_SUS_ENGAGED(curr_susi->su, 0, is_eng); - if (true == is_eng) { - /* send remove all msg for all sis for this su */ - rc = avd_sg_su_si_del_snd(cb, curr_susi->su); - if (NCSCC_RC_SUCCESS != rc) { - LOG_ER("%s:%u: %s (%zu)", __FILE__, __LINE__, curr_susi->su->name.c_str(), - curr_susi->su->name.length()); - goto done; - } - - su_node_ptr = su->get_node_ptr(); - - /* if su-shutdown semantics in progress, mark it locked */ - if (su->saAmfSUAdminState == SA_AMF_ADMIN_SHUTTING_DOWN) { - su->set_admin_state(SA_AMF_ADMIN_LOCKED); - } else if (su_node_ptr->saAmfNodeAdminState == - SA_AMF_ADMIN_SHUTTING_DOWN) { - if (su_node_ptr->is_node_lock() == true) { - node_admin_state_set(su_node_ptr, - SA_AMF_ADMIN_LOCKED); - } - } + } + /* If this active assignment was part of switchover then identify + the quiesced susi assignment and send removal to it. + */ + for (curr_susi = susi->si->list_of_sisu; + curr_susi && (SA_AMF_HA_QUIESCED != curr_susi->state); + curr_susi = curr_susi->si_next) ; + if (curr_susi) { + m_AVD_CHK_OPLIST(curr_susi->su, is_su_present); + if (is_su_present == true) { + is_eng = avd_sg_nway_are_stdby_sus_engaged(curr_susi->su, nullptr); + if ((is_eng == true) && + (curr_susi->su->saAmfSuReadinessState == SA_AMF_READINESS_OUT_OF_SERVICE)) { + rc = avd_sg_su_si_del_snd(cb, curr_susi->su); + if (NCSCC_RC_SUCCESS != rc) { + LOG_ER("Send for deletion failed for '%s'", curr_susi->su->name.c_str()); + goto done; } } } @@ -2581,17 +2658,6 @@ uint32_t SG_NWAY::susi_success_su_oper(A goto done; } - su_node_ptr = su->get_node_ptr(); - - /* if su-shutdown semantics in progress, mark it locked */ - if (su->saAmfSUAdminState == SA_AMF_ADMIN_SHUTTING_DOWN) { - su->set_admin_state(SA_AMF_ADMIN_LOCKED); - } else if (su_node_ptr->saAmfNodeAdminState == SA_AMF_ADMIN_SHUTTING_DOWN) { - if (su_node_ptr->is_node_lock() == true) { - node_admin_state_set(su_node_ptr, SA_AMF_ADMIN_LOCKED); - } - } - /* transition to sg-realign state */ m_AVD_SET_SG_FSM(cb, sg, AVD_SG_FSM_SG_REALIGN); } @@ -2600,6 +2666,7 @@ uint32_t SG_NWAY::susi_success_su_oper(A */ susi->si->delete_assignments(cb); } + su_or_node_shutting_down_to_locked(su); } else if (susi && (SA_AMF_HA_ACTIVE == state) && (AVSV_SUSI_ACT_DEL != act)) { /* => single active assignment success */ @@ -2674,15 +2741,34 @@ uint32_t SG_NWAY::susi_success_su_oper(A avd_sg_nway_si_assign(cb, sg); } else if (susi && (AVSV_SUSI_ACT_DEL == act)) { /* Single susi delete success processing */ - - /* delete the su from the oper list */ - avd_sg_su_oper_list_del(cb, su, false); - + AVD_SU *su = susi->su; + if ((su->saAmfSUAdminState == SA_AMF_ADMIN_LOCKED) || + (su->su_on_node->saAmfNodeAdminState == SA_AMF_ADMIN_LOCKED)) { + /* determine if all the standby sus are engaged */ + is_eng = avd_sg_nway_are_stdby_sus_engaged(su, nullptr); + if (is_eng == true) { + /* + Deletion for this SUSI could have been sent because this SI cannot + be switch-overed/fail-overeed while trying to failover all SUSIs of this SU. + If all SUSIs of this SU are now engaged to their respective actives then + send SU level removal. + */ + rc = avd_sg_su_si_del_snd(cb, su); + if (NCSCC_RC_SUCCESS != rc) { + LOG_ER("Send for deletion failed for '%s'", su->name.c_str()); + goto done; + } + avd_sg_su_oper_list_add(cb, su, false); + m_AVD_SET_SG_FSM(cb, sg, AVD_SG_FSM_SG_REALIGN); + } + } else { + avd_sg_su_oper_list_del(cb, su, false); + } /* free all the CSI assignments */ - avd_compcsi_delete(cb, susi, false); - - /* free susi assignment */ - m_AVD_SU_SI_TRG_DEL(cb, susi); + avd_compcsi_delete(cb, susi, false); + + /* free susi assignment */ + m_AVD_SU_SI_TRG_DEL(cb, susi); /* transition to sg-realign state or initiate si assignments */ diff --git a/src/amf/amfd/su.cc b/src/amf/amfd/su.cc --- a/src/amf/amfd/su.cc +++ b/src/amf/amfd/su.cc @@ -2618,3 +2618,14 @@ void AVD_SU::set_surestart(bool value) bool AVD_SU::get_surestart() const { return surestart; } +/** + * @brief Checks if all SIs assigned to this SU are in same fsm state. + * @return true/false + */ +bool AVD_SU::all_susis_fsm_state(uint32_t fsm) const { + for (AVD_SU_SI_REL *susi = list_of_susi; susi; susi = susi->su_next) { + if (susi->fsm != fsm) + return false; + } + return true; +} diff --git a/src/amf/amfd/su.h b/src/amf/amfd/su.h --- a/src/amf/amfd/su.h +++ b/src/amf/amfd/su.h @@ -147,6 +147,7 @@ class AVD_SU { bool all_comps_in_presence_state(SaAmfPresenceStateT pres) const; void set_surestart(bool state); bool get_surestart() const; + bool all_susis_fsm_state(uint32_t fsm) const; private: void initialize(); ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel