Hi Praveen, I attached them to ticket.
Thanks, Minh On 19/08/16 21:08, praveen malviya wrote: > Hi Minh, > All patches are not received. > Please attached them in the ticket. > > Thanks, > Praveen > > On 18-Aug-16 5:45 AM, Minh Hon Chau wrote: >> osaf/services/saf/amf/amfd/include/sg.h | 4 +- >> osaf/services/saf/amf/amfd/include/susi.h | 2 + >> osaf/services/saf/amf/amfd/ndfsm.cc | 15 ++++++- >> osaf/services/saf/amf/amfd/sg.cc | 37 ++++++++++++++++++- >> osaf/services/saf/amf/amfd/siass.cc | 59 >> +++++++++++++++++++++++++++++- >> osaf/services/saf/amf/amfd/su.cc | 12 ++++++ >> 6 files changed, 121 insertions(+), 8 deletions(-) >> >> >> Since headless interuption is unplanned action and writing rta to IMM >> is currently queued up in AMFD implemenentation. That can result into >> inappropriate states of SG fsm state, SUSI fsm state, ha state, >> SUOperationList, etc. Eventually, AMFD will run into SG unstable, false >> assertion, or even SUSIs become permanently PARTIALLY, which is hard >> to debug (even harder without trace) >> >> This patch adds a validation routine to check headless cached RTAs read >> from IMM, more validation rule to be added. Also, a TODO is left for >> discussion about what's a action should be taken if validation is >> failed. >> >> diff --git a/osaf/services/saf/amf/amfd/include/sg.h >> b/osaf/services/saf/amf/amfd/include/sg.h >> --- a/osaf/services/saf/amf/amfd/include/sg.h >> +++ b/osaf/services/saf/amf/amfd/include/sg.h >> @@ -418,7 +418,7 @@ public: >> bool any_assignment_absent(); >> void failover_absent_assignment(); >> bool ng_using_saAmfSGAdminState; >> - >> + bool headless_validation; >> uint32_t term_su_list_in_reverse(); >> //Runtime calculates value of saAmfSGNumCurrAssignedSUs; >> uint32_t curr_assigned_sus() const; >> @@ -579,7 +579,7 @@ private: >> #define m_AVD_CHK_OPLIST(i_su,flag) (flag) = >> (i_su)->sg_of_su->in_su_oper_list(i_su) >> >> void avd_sg_read_headless_cached_rta(AVD_CL_CB *cb); >> - >> +bool avd_sg_validate_headless_cached_rta(AVD_CL_CB *cb); >> extern void avd_sg_delete(AVD_SG *sg); >> extern void avd_sg_db_add(AVD_SG *sg); >> extern void avd_sg_db_remove(AVD_SG *sg); >> diff --git a/osaf/services/saf/amf/amfd/include/susi.h >> b/osaf/services/saf/amf/amfd/include/susi.h >> --- a/osaf/services/saf/amf/amfd/include/susi.h >> +++ b/osaf/services/saf/amf/amfd/include/susi.h >> @@ -143,6 +143,8 @@ AVD_SU_SI_REL *avd_susi_create(AVD_CL_CB >> AVD_SU_SI_STATE default_fsm = >> AVD_SU_SI_STATE_ABSENT); >> AVD_SU_SI_REL *avd_susi_find(AVD_CL_CB *cb, const SaNameT *su_name, >> const SaNameT *si_name); >> void avd_susi_update_fsm(AVD_SU_SI_REL *susi, AVD_SU_SI_STATE >> new_fsm_state); >> +bool avd_susi_validate_headless_cached_rta(AVD_SU_SI_REL *present_susi, >> + SaAmfHAStateT ha_fr_imm, AVD_SU_SI_STATE fsm_fr_imm); >> void avd_susi_read_headless_cached_rta(AVD_CL_CB *cb); >> extern void avd_susi_update(AVD_SU_SI_REL *susi, SaAmfHAStateT >> ha_state); >> >> diff --git a/osaf/services/saf/amf/amfd/ndfsm.cc >> b/osaf/services/saf/amf/amfd/ndfsm.cc >> --- a/osaf/services/saf/amf/amfd/ndfsm.cc >> +++ b/osaf/services/saf/amf/amfd/ndfsm.cc >> @@ -127,13 +127,22 @@ void avd_process_state_info_queue(AVD_CL >> >> // Read cached rta from Imm, the order of calling >> // below functions is IMPORTANT. >> - // Reading sg must be after reading susi >> - // Cleanup compcsi must be after reading sg >> if (found_state_info == true) { >> + LOG_NO("Enter restore headless cached RTAs from IMM"); >> + // Read all cached susi, includes ABSENT SUSI with IMM fsm >> state >> avd_susi_read_headless_cached_rta(cb); >> + // Read SUSwitch of SU, validate toggle depends on SUSI fsm >> state >> + avd_su_read_headless_cached_rta(cb); >> + // Read SUOperationList, set ABSENT fsm state for ABSENT SUSI >> avd_sg_read_headless_cached_rta(cb); >> + // Clean compcsi object of ABSENT SUSI >> avd_compcsi_cleanup_imm_object(cb); >> - avd_su_read_headless_cached_rta(cb); >> + // Last, validate all >> + bool valid = avd_sg_validate_headless_cached_rta(cb); >> + if (valid) >> + LOG_NO("Leave reading headless cached RTAs from IMM: >> SUCCESS"); >> + else >> + LOG_ER("Leave reading headless cached RTAs from IMM: >> FAILED"); >> } >> done: >> TRACE("queue_size after processing: %lu", (unsigned long) >> cb->evt_queue.size()); >> diff --git a/osaf/services/saf/amf/amfd/sg.cc >> b/osaf/services/saf/amf/amfd/sg.cc >> --- a/osaf/services/saf/amf/amfd/sg.cc >> +++ b/osaf/services/saf/amf/amfd/sg.cc >> @@ -124,7 +124,8 @@ AVD_SG::AVD_SG(): >> max_assigned_su(nullptr), >> min_assigned_su(nullptr), >> si_tobe_redistributed(nullptr), >> - try_inst_counter(0) >> + try_inst_counter(0), >> + headless_validation(true) >> { >> adminOp = static_cast<SaAmfAdminOperationIdT>(0); >> memset(&name, 0, sizeof(SaNameT)); >> @@ -2115,6 +2116,9 @@ void avd_sg_read_headless_cached_rta(AVD >> (SaImmAttrValuesT_2 ***)&attributes)) == >> SA_AIS_OK) { >> sg = sg_db->find(Amf::to_string(&sg_dn)); >> if (sg && sg->sg_ncs_spec == false) { >> + if (sg->headless_validation == false) { >> + continue; >> + } >> // Read sg fsm state >> rc = >> immutil_getAttr(const_cast<SaImmAttrNameT>("osafAmfSGFsmState"), >> attributes, 0, &imm_sg_fsm_state); >> @@ -2159,6 +2163,37 @@ done: >> TRACE_LEAVE(); >> } >> >> +/** >> + * @brief Validate all cached RTAs read from IMM after headless. >> + This validation is necessary. If AMFD doesn't have this >> + validation routine and the cached RTAs are invalid, >> + that would lead into *unpredictably* wrong states, which >> + is hard to debug (harder if no trace) >> + * @param Control block (AVD_CL_CB). >> + * @Return true if valid, false otherwise. >> +*/ >> +bool avd_sg_validate_headless_cached_rta(AVD_CL_CB *cb) { >> + TRACE_ENTER(); >> + bool valid = true; >> + for (std::map<std::string, AVD_SG*>::const_iterator it = >> sg_db->begin(); >> + it != sg_db->end(); it++) { >> + AVD_SG *i_sg = it->second; >> + if (i_sg->sg_ncs_spec == true) { >> + continue; >> + } >> + >> + if (i_sg->headless_validation == false) { >> + //TODO: AMFD should make all SUs of this SG faulty to >> remove >> + //all assignments, clean up IMM headless cached RTA. >> + //Just assert for now >> + //osafassert(false); >> + valid = false; >> + } >> + } >> + TRACE_LEAVE2("%u", valid); >> + return valid; >> +} >> + >> void AVD_SG::failover_absent_assignment() { >> >> TRACE_ENTER2("SG:'%s'", Amf::to_string(&name).c_str()); >> diff --git a/osaf/services/saf/amf/amfd/siass.cc >> b/osaf/services/saf/amf/amfd/siass.cc >> --- a/osaf/services/saf/amf/amfd/siass.cc >> +++ b/osaf/services/saf/amf/amfd/siass.cc >> @@ -214,11 +214,17 @@ void avd_susi_read_headless_cached_rta(A >> susi = avd_su_susi_find(cb, su, &si->name); >> rc = immutil_getAttr("osafAmfSISUFsmState", attributes, 0, >> &imm_susi_fsm); >> osafassert(rc == SA_AIS_OK); >> + rc = immutil_getAttr("saAmfSISUHAState", attributes, 0, >> &imm_ha_state); >> + osafassert(rc == SA_AIS_OK); >> >> if (susi) { // FOR PRESENT SUSI found in AMFND(s) >> TRACE("SISU:'%s', old(imm) fsm state: %d, new(sync) fsm >> state: %d", >> Amf::to_string(&dn).c_str(), imm_susi_fsm, susi->fsm); >> >> + if (avd_susi_validate_headless_cached_rta(susi, >> imm_ha_state, >> + imm_susi_fsm) == false) { >> + continue; >> + } >> #if 1 >> // If remove the below line in this #if block, AMFD will >> use >> // the synced fsm state, which is latest. That means, in >> @@ -255,8 +261,6 @@ void avd_susi_read_headless_cached_rta(A >> >> } else { // For ABSENT SUSI >> if (su->sg_of_su->sg_ncs_spec == false) { >> - rc = immutil_getAttr("saAmfSISUHAState", attributes, >> 0, &imm_ha_state); >> - osafassert(rc == SA_AIS_OK); >> TRACE("Absent SUSI, ha_state:'%u', fsm_state:'%u'", >> imm_ha_state, imm_susi_fsm); >> if (imm_susi_fsm != AVD_SU_SI_STATE_UNASGN) { >> absent_susi = avd_susi_create(avd_cb, si, su, >> imm_ha_state, false, AVSV_SUSI_ACT_BASE); >> @@ -288,6 +292,57 @@ void avd_susi_read_headless_cached_rta(A >> done: >> TRACE_LEAVE(); >> } >> +/** >> + * Validate cached RTA read from IMM >> + * @param present_susi >> + * @param ha_fr_imm: Ha state of @present_susi read from IMM >> + * @param fsm_fr_imm: Fsm state of @present susi read from IMM >> + * @return: true of valid, false otherwise >> + */ >> +bool avd_susi_validate_headless_cached_rta(AVD_SU_SI_REL *present_susi, >> + SaAmfHAStateT ha_fr_imm, AVD_SU_SI_STATE fsm_fr_imm) { >> + std::string dn = Amf::to_string(&present_susi->si->name) + "," + >> + Amf::to_string(&present_susi->su->name); >> + TRACE_ENTER2("SISU:'%s'", dn.c_str()); >> + bool valid = true; >> + // rule 1: valid ha state >> + if (ha_fr_imm != present_susi->state) { >> + if (ha_fr_imm == SA_AMF_HA_QUIESCING || >> + ha_fr_imm == SA_AMF_HA_QUIESCED) { >> + // That's fine >> + ; >> + } else { >> + LOG_ER("SISU:'%s', old(imm) ha state: %d, new(sync) ha >> state: %d", >> + dn.c_str(), ha_fr_imm, present_susi->state); >> + valid = false; >> + goto done; >> + } >> + } >> + // rule 2: if ha_fr_imm is QUIESCING, one of relevant entities must >> + // have adminState is SHUTTINGDOWN >> + if (ha_fr_imm == SA_AMF_HA_QUIESCING) { >> + if (present_susi->su->saAmfSUAdminState == >> SA_AMF_ADMIN_SHUTTING_DOWN || >> + present_susi->si->saAmfSIAdminState == >> SA_AMF_ADMIN_SHUTTING_DOWN || >> + present_susi->su->sg_of_su->saAmfSGAdminState == >> SA_AMF_ADMIN_SHUTTING_DOWN || >> + present_susi->su->su_on_node->saAmfNodeAdminState == >> SA_AMF_ADMIN_SHUTTING_DOWN) { >> + // That's fine >> + ; >> + } else { >> + LOG_ER("SISU:'%s', ha:'%u', but one of [node/sg/su/si] >> is not in SHUTTING_DOWN", >> + dn.c_str(), ha_fr_imm); >> + valid = false; >> + goto done; >> + } >> + } >> + // TODO: more rules to be added when issue is found in reality >> due to writing >> + // cached RTA to IMM >> +done: >> + if (valid == false) >> + present_susi->su->sg_of_su->headless_validation = valid; >> + >> + TRACE_LEAVE2("%u, %u", valid, >> present_susi->su->sg_of_su->headless_validation); >> + return present_susi->su->sg_of_su->headless_validation; >> +} >> >> /***************************************************************************** >> >> >> * Function: avd_susi_create >> * >> diff --git a/osaf/services/saf/amf/amfd/su.cc >> b/osaf/services/saf/amf/amfd/su.cc >> --- a/osaf/services/saf/amf/amfd/su.cc >> +++ b/osaf/services/saf/amf/amfd/su.cc >> @@ -1964,6 +1964,18 @@ void avd_su_read_headless_cached_rta(AVD >> rc = >> immutil_getAttr(const_cast<SaImmAttrNameT>("osafAmfSUSwitch"), >> attributes, 0, &su_toggle); >> osafassert(rc == SA_AIS_OK); >> + if (su_toggle == AVSV_SI_TOGGLE_SWITCH) { >> + // 2N, if toggle but no pending assignment -> bad state >> + if (su->sg_of_su->sg_redundancy_model == >> SA_AMF_2N_REDUNDANCY_MODEL && >> + su->sg_of_su->any_assignment_in_progress() == false){ >> + LOG_ER("SG'%s', osafAmfSUSwitch:'%u', but no >> pending assignment", >> + Amf::to_string(&su->sg_of_su->name).c_str(), >> + su_toggle); >> + su->sg_of_su->headless_validation = false; >> + } >> + if (su->sg_of_su->headless_validation == false) >> + continue; >> + } >> su->set_su_switch(su_toggle, false); >> } >> } >> > ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel