osaf/services/saf/amf/amfd/include/sg.h   |   4 +-
 osaf/services/saf/amf/amfd/include/susi.h |   2 +
 osaf/services/saf/amf/amfd/ndfsm.cc       |  15 ++++++-
 osaf/services/saf/amf/amfd/sg.cc          |  37 ++++++++++++++++++-
 osaf/services/saf/amf/amfd/siass.cc       |  59 +++++++++++++++++++++++++++++-
 osaf/services/saf/amf/amfd/su.cc          |  12 ++++++
 6 files changed, 121 insertions(+), 8 deletions(-)


Since headless interuption is unplanned action and writing rta to IMM
is currently queued up in AMFD implemenentation. That can result into
inappropriate states of SG fsm state, SUSI fsm state, ha state,
SUOperationList, etc. Eventually, AMFD will run into SG unstable, false
assertion, or even SUSIs become permanently PARTIALLY, which is hard
to debug (even harder without trace)

This patch adds a validation routine to check headless cached RTAs read
from IMM, more validation rule to be added. Also, a TODO is left for
discussion about what's a action should be taken if validation is failed.

diff --git a/osaf/services/saf/amf/amfd/include/sg.h 
b/osaf/services/saf/amf/amfd/include/sg.h
--- a/osaf/services/saf/amf/amfd/include/sg.h
+++ b/osaf/services/saf/amf/amfd/include/sg.h
@@ -418,7 +418,7 @@ public:
        bool any_assignment_absent();
        void failover_absent_assignment();
        bool ng_using_saAmfSGAdminState;
-       
+       bool headless_validation;
        uint32_t term_su_list_in_reverse();
        //Runtime calculates value of saAmfSGNumCurrAssignedSUs;
        uint32_t curr_assigned_sus() const;
@@ -579,7 +579,7 @@ private:
 #define m_AVD_CHK_OPLIST(i_su,flag) (flag) = 
(i_su)->sg_of_su->in_su_oper_list(i_su)
 
 void avd_sg_read_headless_cached_rta(AVD_CL_CB *cb);
-
+bool avd_sg_validate_headless_cached_rta(AVD_CL_CB *cb);
 extern void avd_sg_delete(AVD_SG *sg);
 extern void avd_sg_db_add(AVD_SG *sg);
 extern void avd_sg_db_remove(AVD_SG *sg);
diff --git a/osaf/services/saf/amf/amfd/include/susi.h 
b/osaf/services/saf/amf/amfd/include/susi.h
--- a/osaf/services/saf/amf/amfd/include/susi.h
+++ b/osaf/services/saf/amf/amfd/include/susi.h
@@ -143,6 +143,8 @@ AVD_SU_SI_REL *avd_susi_create(AVD_CL_CB
                                                AVD_SU_SI_STATE default_fsm = 
AVD_SU_SI_STATE_ABSENT);
 AVD_SU_SI_REL *avd_susi_find(AVD_CL_CB *cb, const SaNameT *su_name, const 
SaNameT *si_name);
 void avd_susi_update_fsm(AVD_SU_SI_REL *susi, AVD_SU_SI_STATE new_fsm_state);
+bool avd_susi_validate_headless_cached_rta(AVD_SU_SI_REL *present_susi,
+               SaAmfHAStateT ha_fr_imm, AVD_SU_SI_STATE fsm_fr_imm);
 void avd_susi_read_headless_cached_rta(AVD_CL_CB *cb);
 extern void avd_susi_update(AVD_SU_SI_REL *susi, SaAmfHAStateT ha_state);
 
diff --git a/osaf/services/saf/amf/amfd/ndfsm.cc 
b/osaf/services/saf/amf/amfd/ndfsm.cc
--- a/osaf/services/saf/amf/amfd/ndfsm.cc
+++ b/osaf/services/saf/amf/amfd/ndfsm.cc
@@ -127,13 +127,22 @@ void avd_process_state_info_queue(AVD_CL
 
        // Read cached rta from Imm, the order of calling
        // below functions is IMPORTANT.
-       // Reading sg must be after reading susi
-       // Cleanup compcsi must be after reading sg
        if (found_state_info == true) {
+               LOG_NO("Enter restore headless cached RTAs from IMM");
+               // Read all cached susi, includes ABSENT SUSI with IMM fsm state
                avd_susi_read_headless_cached_rta(cb);
+               // Read SUSwitch of SU, validate toggle depends on SUSI fsm 
state
+               avd_su_read_headless_cached_rta(cb);
+               // Read SUOperationList, set ABSENT fsm state for ABSENT SUSI
                avd_sg_read_headless_cached_rta(cb);
+               // Clean compcsi object of ABSENT SUSI
                avd_compcsi_cleanup_imm_object(cb);
-               avd_su_read_headless_cached_rta(cb);
+               // Last, validate all
+               bool valid = avd_sg_validate_headless_cached_rta(cb);
+               if (valid)
+                       LOG_NO("Leave reading headless cached RTAs from IMM: 
SUCCESS");
+               else
+                       LOG_ER("Leave reading headless cached RTAs from IMM: 
FAILED");
        }
 done:
        TRACE("queue_size after processing: %lu", (unsigned long) 
cb->evt_queue.size());
diff --git a/osaf/services/saf/amf/amfd/sg.cc b/osaf/services/saf/amf/amfd/sg.cc
--- a/osaf/services/saf/amf/amfd/sg.cc
+++ b/osaf/services/saf/amf/amfd/sg.cc
@@ -124,7 +124,8 @@ AVD_SG::AVD_SG():
                max_assigned_su(nullptr),
                min_assigned_su(nullptr),
                si_tobe_redistributed(nullptr),
-               try_inst_counter(0)
+               try_inst_counter(0),
+               headless_validation(true)
 {
        adminOp = static_cast<SaAmfAdminOperationIdT>(0);
        memset(&name, 0, sizeof(SaNameT));
@@ -2115,6 +2116,9 @@ void avd_sg_read_headless_cached_rta(AVD
                                        (SaImmAttrValuesT_2 ***)&attributes)) 
== SA_AIS_OK) {
                sg = sg_db->find(Amf::to_string(&sg_dn));
                if (sg && sg->sg_ncs_spec == false) {
+                       if (sg->headless_validation == false) {
+                               continue;
+                       }
                        // Read sg fsm state
                        rc = 
immutil_getAttr(const_cast<SaImmAttrNameT>("osafAmfSGFsmState"),
                                        attributes, 0, &imm_sg_fsm_state);
@@ -2159,6 +2163,37 @@ done:
     TRACE_LEAVE();
 }
 
+/**
+ * @brief  Validate all cached RTAs read from IMM after headless.
+           This validation is necessary. If AMFD doesn't have this
+           validation routine and the cached RTAs are invalid,
+           that would lead into *unpredictably* wrong states, which
+           is hard to debug (harder if no trace)
+ * @param  Control block (AVD_CL_CB).
+ * @Return true if valid, false otherwise.
+*/
+bool avd_sg_validate_headless_cached_rta(AVD_CL_CB *cb) {
+       TRACE_ENTER();
+       bool valid = true;
+       for (std::map<std::string, AVD_SG*>::const_iterator it = sg_db->begin();
+                       it != sg_db->end(); it++) {
+               AVD_SG *i_sg = it->second;
+               if (i_sg->sg_ncs_spec == true) {
+                       continue;
+               }
+
+               if (i_sg->headless_validation == false) {
+                       //TODO: AMFD should make all SUs of this SG faulty to 
remove
+                       //all assignments, clean up IMM headless cached RTA.
+                       //Just assert for now
+                       //osafassert(false);
+                       valid = false;
+               }
+       }
+       TRACE_LEAVE2("%u", valid);
+       return valid;
+}
+
 void AVD_SG::failover_absent_assignment() {
 
        TRACE_ENTER2("SG:'%s'", Amf::to_string(&name).c_str());
diff --git a/osaf/services/saf/amf/amfd/siass.cc 
b/osaf/services/saf/amf/amfd/siass.cc
--- a/osaf/services/saf/amf/amfd/siass.cc
+++ b/osaf/services/saf/amf/amfd/siass.cc
@@ -214,11 +214,17 @@ void avd_susi_read_headless_cached_rta(A
                susi = avd_su_susi_find(cb, su, &si->name);
                rc = immutil_getAttr("osafAmfSISUFsmState", attributes, 0, 
&imm_susi_fsm);
                osafassert(rc == SA_AIS_OK);
+               rc = immutil_getAttr("saAmfSISUHAState", attributes, 0, 
&imm_ha_state);
+               osafassert(rc == SA_AIS_OK);
 
                if (susi) { // FOR PRESENT SUSI found in AMFND(s)
                        TRACE("SISU:'%s', old(imm) fsm state: %d, new(sync) fsm 
state: %d",
                                Amf::to_string(&dn).c_str(), imm_susi_fsm, 
susi->fsm);
 
+                       if (avd_susi_validate_headless_cached_rta(susi, 
imm_ha_state,
+                                       imm_susi_fsm) == false) {
+                               continue;
+                       }
 #if 1
                        // If remove the below line in this #if block, AMFD 
will use
                        // the synced fsm state, which is latest. That means, in
@@ -255,8 +261,6 @@ void avd_susi_read_headless_cached_rta(A
 
                } else { // For ABSENT SUSI
                        if (su->sg_of_su->sg_ncs_spec == false) {
-                               rc = immutil_getAttr("saAmfSISUHAState", 
attributes, 0, &imm_ha_state);
-                               osafassert(rc == SA_AIS_OK);
                                TRACE("Absent SUSI, ha_state:'%u', 
fsm_state:'%u'", imm_ha_state, imm_susi_fsm);
                                if (imm_susi_fsm != AVD_SU_SI_STATE_UNASGN) {
                                        absent_susi = avd_susi_create(avd_cb, 
si, su, imm_ha_state, false, AVSV_SUSI_ACT_BASE);
@@ -288,6 +292,57 @@ void avd_susi_read_headless_cached_rta(A
 done:
     TRACE_LEAVE();
 }
+/**
+ * Validate cached RTA read from IMM
+ * @param present_susi
+ * @param ha_fr_imm: Ha state of @present_susi read from IMM
+ * @param fsm_fr_imm: Fsm state of @present susi read from IMM
+ * @return: true of valid, false otherwise
+ */
+bool avd_susi_validate_headless_cached_rta(AVD_SU_SI_REL *present_susi,
+               SaAmfHAStateT ha_fr_imm, AVD_SU_SI_STATE fsm_fr_imm) {
+       std::string dn = Amf::to_string(&present_susi->si->name) + "," +
+                       Amf::to_string(&present_susi->su->name);
+       TRACE_ENTER2("SISU:'%s'", dn.c_str());
+       bool valid = true;
+       // rule 1: valid ha state
+       if (ha_fr_imm != present_susi->state) {
+               if (ha_fr_imm == SA_AMF_HA_QUIESCING ||
+                       ha_fr_imm == SA_AMF_HA_QUIESCED) {
+                       // That's fine
+                       ;
+               } else {
+                       LOG_ER("SISU:'%s', old(imm) ha state: %d, new(sync) ha 
state: %d",
+                       dn.c_str(),     ha_fr_imm, present_susi->state);
+                       valid = false;
+                       goto done;
+               }
+       }
+       // rule 2: if ha_fr_imm is QUIESCING, one of relevant entities must
+       // have adminState is SHUTTINGDOWN
+       if (ha_fr_imm == SA_AMF_HA_QUIESCING) {
+               if (present_susi->su->saAmfSUAdminState == 
SA_AMF_ADMIN_SHUTTING_DOWN ||
+                       present_susi->si->saAmfSIAdminState == 
SA_AMF_ADMIN_SHUTTING_DOWN ||
+                       present_susi->su->sg_of_su->saAmfSGAdminState == 
SA_AMF_ADMIN_SHUTTING_DOWN ||
+                       present_susi->su->su_on_node->saAmfNodeAdminState == 
SA_AMF_ADMIN_SHUTTING_DOWN) {
+                       // That's fine
+                       ;
+               } else {
+                       LOG_ER("SISU:'%s', ha:'%u', but one of [node/sg/su/si] 
is not in SHUTTING_DOWN",
+                                       dn.c_str(), ha_fr_imm);
+                       valid = false;
+                       goto done;
+               }
+       }
+       // TODO: more rules to be added when issue is found in reality due to 
writing
+       // cached RTA to IMM
+done:
+       if (valid == false)
+               present_susi->su->sg_of_su->headless_validation = valid;
+
+       TRACE_LEAVE2("%u, %u", valid, 
present_susi->su->sg_of_su->headless_validation);
+       return present_susi->su->sg_of_su->headless_validation;
+}
 /*****************************************************************************
  * Function: avd_susi_create
  *
diff --git a/osaf/services/saf/amf/amfd/su.cc b/osaf/services/saf/amf/amfd/su.cc
--- a/osaf/services/saf/amf/amfd/su.cc
+++ b/osaf/services/saf/amf/amfd/su.cc
@@ -1964,6 +1964,18 @@ void avd_su_read_headless_cached_rta(AVD
                        rc = 
immutil_getAttr(const_cast<SaImmAttrNameT>("osafAmfSUSwitch"),
                                        attributes, 0, &su_toggle);
                        osafassert(rc == SA_AIS_OK);
+                       if (su_toggle == AVSV_SI_TOGGLE_SWITCH) {
+                               // 2N, if toggle but no pending assignment -> 
bad state
+                               if (su->sg_of_su->sg_redundancy_model == 
SA_AMF_2N_REDUNDANCY_MODEL &&
+                                       
su->sg_of_su->any_assignment_in_progress() == false){
+                                       LOG_ER("SG'%s', osafAmfSUSwitch:'%u', 
but no pending assignment",
+                                                       
Amf::to_string(&su->sg_of_su->name).c_str(),
+                                                       su_toggle);
+                                       su->sg_of_su->headless_validation = 
false;
+                               }
+                               if (su->sg_of_su->headless_validation == false)
+                                       continue;
+                       }
                        su->set_su_switch(su_toggle, false);
                }
        }

------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to