Hi Minh,
All patches are not received.
Please attached them in the ticket.
Thanks,
Praveen
On 18-Aug-16 5:45 AM, Minh Hon Chau wrote:
> osaf/services/saf/amf/amfd/include/sg.h | 4 +-
> osaf/services/saf/amf/amfd/include/susi.h | 2 +
> osaf/services/saf/amf/amfd/ndfsm.cc | 15 ++++++-
> osaf/services/saf/amf/amfd/sg.cc | 37 ++++++++++++++++++-
> osaf/services/saf/amf/amfd/siass.cc | 59
> +++++++++++++++++++++++++++++-
> osaf/services/saf/amf/amfd/su.cc | 12 ++++++
> 6 files changed, 121 insertions(+), 8 deletions(-)
>
>
> Since headless interuption is unplanned action and writing rta to IMM
> is currently queued up in AMFD implemenentation. That can result into
> inappropriate states of SG fsm state, SUSI fsm state, ha state,
> SUOperationList, etc. Eventually, AMFD will run into SG unstable, false
> assertion, or even SUSIs become permanently PARTIALLY, which is hard
> to debug (even harder without trace)
>
> This patch adds a validation routine to check headless cached RTAs read
> from IMM, more validation rule to be added. Also, a TODO is left for
> discussion about what's a action should be taken if validation is failed.
>
> diff --git a/osaf/services/saf/amf/amfd/include/sg.h
> b/osaf/services/saf/amf/amfd/include/sg.h
> --- a/osaf/services/saf/amf/amfd/include/sg.h
> +++ b/osaf/services/saf/amf/amfd/include/sg.h
> @@ -418,7 +418,7 @@ public:
> bool any_assignment_absent();
> void failover_absent_assignment();
> bool ng_using_saAmfSGAdminState;
> -
> + bool headless_validation;
> uint32_t term_su_list_in_reverse();
> //Runtime calculates value of saAmfSGNumCurrAssignedSUs;
> uint32_t curr_assigned_sus() const;
> @@ -579,7 +579,7 @@ private:
> #define m_AVD_CHK_OPLIST(i_su,flag) (flag) =
> (i_su)->sg_of_su->in_su_oper_list(i_su)
>
> void avd_sg_read_headless_cached_rta(AVD_CL_CB *cb);
> -
> +bool avd_sg_validate_headless_cached_rta(AVD_CL_CB *cb);
> extern void avd_sg_delete(AVD_SG *sg);
> extern void avd_sg_db_add(AVD_SG *sg);
> extern void avd_sg_db_remove(AVD_SG *sg);
> diff --git a/osaf/services/saf/amf/amfd/include/susi.h
> b/osaf/services/saf/amf/amfd/include/susi.h
> --- a/osaf/services/saf/amf/amfd/include/susi.h
> +++ b/osaf/services/saf/amf/amfd/include/susi.h
> @@ -143,6 +143,8 @@ AVD_SU_SI_REL *avd_susi_create(AVD_CL_CB
> AVD_SU_SI_STATE default_fsm =
> AVD_SU_SI_STATE_ABSENT);
> AVD_SU_SI_REL *avd_susi_find(AVD_CL_CB *cb, const SaNameT *su_name, const
> SaNameT *si_name);
> void avd_susi_update_fsm(AVD_SU_SI_REL *susi, AVD_SU_SI_STATE new_fsm_state);
> +bool avd_susi_validate_headless_cached_rta(AVD_SU_SI_REL *present_susi,
> + SaAmfHAStateT ha_fr_imm, AVD_SU_SI_STATE fsm_fr_imm);
> void avd_susi_read_headless_cached_rta(AVD_CL_CB *cb);
> extern void avd_susi_update(AVD_SU_SI_REL *susi, SaAmfHAStateT ha_state);
>
> diff --git a/osaf/services/saf/amf/amfd/ndfsm.cc
> b/osaf/services/saf/amf/amfd/ndfsm.cc
> --- a/osaf/services/saf/amf/amfd/ndfsm.cc
> +++ b/osaf/services/saf/amf/amfd/ndfsm.cc
> @@ -127,13 +127,22 @@ void avd_process_state_info_queue(AVD_CL
>
> // Read cached rta from Imm, the order of calling
> // below functions is IMPORTANT.
> - // Reading sg must be after reading susi
> - // Cleanup compcsi must be after reading sg
> if (found_state_info == true) {
> + LOG_NO("Enter restore headless cached RTAs from IMM");
> + // Read all cached susi, includes ABSENT SUSI with IMM fsm state
> avd_susi_read_headless_cached_rta(cb);
> + // Read SUSwitch of SU, validate toggle depends on SUSI fsm
> state
> + avd_su_read_headless_cached_rta(cb);
> + // Read SUOperationList, set ABSENT fsm state for ABSENT SUSI
> avd_sg_read_headless_cached_rta(cb);
> + // Clean compcsi object of ABSENT SUSI
> avd_compcsi_cleanup_imm_object(cb);
> - avd_su_read_headless_cached_rta(cb);
> + // Last, validate all
> + bool valid = avd_sg_validate_headless_cached_rta(cb);
> + if (valid)
> + LOG_NO("Leave reading headless cached RTAs from IMM:
> SUCCESS");
> + else
> + LOG_ER("Leave reading headless cached RTAs from IMM:
> FAILED");
> }
> done:
> TRACE("queue_size after processing: %lu", (unsigned long)
> cb->evt_queue.size());
> diff --git a/osaf/services/saf/amf/amfd/sg.cc
> b/osaf/services/saf/amf/amfd/sg.cc
> --- a/osaf/services/saf/amf/amfd/sg.cc
> +++ b/osaf/services/saf/amf/amfd/sg.cc
> @@ -124,7 +124,8 @@ AVD_SG::AVD_SG():
> max_assigned_su(nullptr),
> min_assigned_su(nullptr),
> si_tobe_redistributed(nullptr),
> - try_inst_counter(0)
> + try_inst_counter(0),
> + headless_validation(true)
> {
> adminOp = static_cast<SaAmfAdminOperationIdT>(0);
> memset(&name, 0, sizeof(SaNameT));
> @@ -2115,6 +2116,9 @@ void avd_sg_read_headless_cached_rta(AVD
> (SaImmAttrValuesT_2 ***)&attributes))
> == SA_AIS_OK) {
> sg = sg_db->find(Amf::to_string(&sg_dn));
> if (sg && sg->sg_ncs_spec == false) {
> + if (sg->headless_validation == false) {
> + continue;
> + }
> // Read sg fsm state
> rc =
> immutil_getAttr(const_cast<SaImmAttrNameT>("osafAmfSGFsmState"),
> attributes, 0, &imm_sg_fsm_state);
> @@ -2159,6 +2163,37 @@ done:
> TRACE_LEAVE();
> }
>
> +/**
> + * @brief Validate all cached RTAs read from IMM after headless.
> + This validation is necessary. If AMFD doesn't have this
> + validation routine and the cached RTAs are invalid,
> + that would lead into *unpredictably* wrong states, which
> + is hard to debug (harder if no trace)
> + * @param Control block (AVD_CL_CB).
> + * @Return true if valid, false otherwise.
> +*/
> +bool avd_sg_validate_headless_cached_rta(AVD_CL_CB *cb) {
> + TRACE_ENTER();
> + bool valid = true;
> + for (std::map<std::string, AVD_SG*>::const_iterator it = sg_db->begin();
> + it != sg_db->end(); it++) {
> + AVD_SG *i_sg = it->second;
> + if (i_sg->sg_ncs_spec == true) {
> + continue;
> + }
> +
> + if (i_sg->headless_validation == false) {
> + //TODO: AMFD should make all SUs of this SG faulty to
> remove
> + //all assignments, clean up IMM headless cached RTA.
> + //Just assert for now
> + //osafassert(false);
> + valid = false;
> + }
> + }
> + TRACE_LEAVE2("%u", valid);
> + return valid;
> +}
> +
> void AVD_SG::failover_absent_assignment() {
>
> TRACE_ENTER2("SG:'%s'", Amf::to_string(&name).c_str());
> diff --git a/osaf/services/saf/amf/amfd/siass.cc
> b/osaf/services/saf/amf/amfd/siass.cc
> --- a/osaf/services/saf/amf/amfd/siass.cc
> +++ b/osaf/services/saf/amf/amfd/siass.cc
> @@ -214,11 +214,17 @@ void avd_susi_read_headless_cached_rta(A
> susi = avd_su_susi_find(cb, su, &si->name);
> rc = immutil_getAttr("osafAmfSISUFsmState", attributes, 0,
> &imm_susi_fsm);
> osafassert(rc == SA_AIS_OK);
> + rc = immutil_getAttr("saAmfSISUHAState", attributes, 0,
> &imm_ha_state);
> + osafassert(rc == SA_AIS_OK);
>
> if (susi) { // FOR PRESENT SUSI found in AMFND(s)
> TRACE("SISU:'%s', old(imm) fsm state: %d, new(sync) fsm
> state: %d",
> Amf::to_string(&dn).c_str(), imm_susi_fsm,
> susi->fsm);
>
> + if (avd_susi_validate_headless_cached_rta(susi,
> imm_ha_state,
> + imm_susi_fsm) == false) {
> + continue;
> + }
> #if 1
> // If remove the below line in this #if block, AMFD
> will use
> // the synced fsm state, which is latest. That means, in
> @@ -255,8 +261,6 @@ void avd_susi_read_headless_cached_rta(A
>
> } else { // For ABSENT SUSI
> if (su->sg_of_su->sg_ncs_spec == false) {
> - rc = immutil_getAttr("saAmfSISUHAState",
> attributes, 0, &imm_ha_state);
> - osafassert(rc == SA_AIS_OK);
> TRACE("Absent SUSI, ha_state:'%u',
> fsm_state:'%u'", imm_ha_state, imm_susi_fsm);
> if (imm_susi_fsm != AVD_SU_SI_STATE_UNASGN) {
> absent_susi = avd_susi_create(avd_cb,
> si, su, imm_ha_state, false, AVSV_SUSI_ACT_BASE);
> @@ -288,6 +292,57 @@ void avd_susi_read_headless_cached_rta(A
> done:
> TRACE_LEAVE();
> }
> +/**
> + * Validate cached RTA read from IMM
> + * @param present_susi
> + * @param ha_fr_imm: Ha state of @present_susi read from IMM
> + * @param fsm_fr_imm: Fsm state of @present susi read from IMM
> + * @return: true of valid, false otherwise
> + */
> +bool avd_susi_validate_headless_cached_rta(AVD_SU_SI_REL *present_susi,
> + SaAmfHAStateT ha_fr_imm, AVD_SU_SI_STATE fsm_fr_imm) {
> + std::string dn = Amf::to_string(&present_susi->si->name) + "," +
> + Amf::to_string(&present_susi->su->name);
> + TRACE_ENTER2("SISU:'%s'", dn.c_str());
> + bool valid = true;
> + // rule 1: valid ha state
> + if (ha_fr_imm != present_susi->state) {
> + if (ha_fr_imm == SA_AMF_HA_QUIESCING ||
> + ha_fr_imm == SA_AMF_HA_QUIESCED) {
> + // That's fine
> + ;
> + } else {
> + LOG_ER("SISU:'%s', old(imm) ha state: %d, new(sync) ha
> state: %d",
> + dn.c_str(), ha_fr_imm, present_susi->state);
> + valid = false;
> + goto done;
> + }
> + }
> + // rule 2: if ha_fr_imm is QUIESCING, one of relevant entities must
> + // have adminState is SHUTTINGDOWN
> + if (ha_fr_imm == SA_AMF_HA_QUIESCING) {
> + if (present_susi->su->saAmfSUAdminState ==
> SA_AMF_ADMIN_SHUTTING_DOWN ||
> + present_susi->si->saAmfSIAdminState ==
> SA_AMF_ADMIN_SHUTTING_DOWN ||
> + present_susi->su->sg_of_su->saAmfSGAdminState ==
> SA_AMF_ADMIN_SHUTTING_DOWN ||
> + present_susi->su->su_on_node->saAmfNodeAdminState ==
> SA_AMF_ADMIN_SHUTTING_DOWN) {
> + // That's fine
> + ;
> + } else {
> + LOG_ER("SISU:'%s', ha:'%u', but one of [node/sg/su/si]
> is not in SHUTTING_DOWN",
> + dn.c_str(), ha_fr_imm);
> + valid = false;
> + goto done;
> + }
> + }
> + // TODO: more rules to be added when issue is found in reality due to
> writing
> + // cached RTA to IMM
> +done:
> + if (valid == false)
> + present_susi->su->sg_of_su->headless_validation = valid;
> +
> + TRACE_LEAVE2("%u, %u", valid,
> present_susi->su->sg_of_su->headless_validation);
> + return present_susi->su->sg_of_su->headless_validation;
> +}
>
> /*****************************************************************************
> * Function: avd_susi_create
> *
> diff --git a/osaf/services/saf/amf/amfd/su.cc
> b/osaf/services/saf/amf/amfd/su.cc
> --- a/osaf/services/saf/amf/amfd/su.cc
> +++ b/osaf/services/saf/amf/amfd/su.cc
> @@ -1964,6 +1964,18 @@ void avd_su_read_headless_cached_rta(AVD
> rc =
> immutil_getAttr(const_cast<SaImmAttrNameT>("osafAmfSUSwitch"),
> attributes, 0, &su_toggle);
> osafassert(rc == SA_AIS_OK);
> + if (su_toggle == AVSV_SI_TOGGLE_SWITCH) {
> + // 2N, if toggle but no pending assignment ->
> bad state
> + if (su->sg_of_su->sg_redundancy_model ==
> SA_AMF_2N_REDUNDANCY_MODEL &&
> +
> su->sg_of_su->any_assignment_in_progress() == false){
> + LOG_ER("SG'%s', osafAmfSUSwitch:'%u',
> but no pending assignment",
> +
> Amf::to_string(&su->sg_of_su->name).c_str(),
> + su_toggle);
> + su->sg_of_su->headless_validation =
> false;
> + }
> + if (su->sg_of_su->headless_validation == false)
> + continue;
> + }
> su->set_su_switch(su_toggle, false);
> }
> }
>
------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel