osaf/services/saf/amf/amfnd/clc.cc            |   5 +
 osaf/services/saf/amf/amfnd/di.cc             |   2 +-
 osaf/services/saf/amf/amfnd/include/avnd_su.h |   3 +-
 osaf/services/saf/amf/amfnd/su.cc             |  14 ++++
 osaf/services/saf/amf/amfnd/susm.cc           |  89 ++++++++++++++++++++++++--
 5 files changed, 102 insertions(+), 11 deletions(-)


A SU having one NPI and one PI comp moved to term-failed state during fresh 
assignments
and repair admin opreration does not work. It was NPI comp that faulted.

When SG is unlocked, AMFND initiates active assignments by instantiating the
NPI component and sending CSI set callback to PI comp. After instantiation 
failure
of NPI comp, AMFND tries to clean up the component. Cleanup fails.
AMFND marks comp and SU in TERM_FAILED state and terminates PI comp also. But 
AMFND
neither responds to AMFD for the completion of assignment nor it sends any 
recovery request.
Because of this SG remains unstable in REALIGN state. In this state, no admin
operation is allowed.

Patch solves the problem to send a recovery request to AMFD, so that it
deletes the assignments and marks SG stable.

diff --git a/osaf/services/saf/amf/amfnd/clc.cc 
b/osaf/services/saf/amf/amfnd/clc.cc
--- a/osaf/services/saf/amf/amfnd/clc.cc
+++ b/osaf/services/saf/amf/amfnd/clc.cc
@@ -1394,6 +1394,11 @@ uint32_t avnd_comp_clc_st_chng_prc(AVND_
                        ev = AVND_SU_PRES_FSM_EV_COMP_TERM_FAIL;
                else if ((SA_AMF_PRESENCE_TERMINATING == final_st) && 
(comp->su->pres == SA_AMF_PRESENCE_RESTARTING))
                        ev = AVND_SU_PRES_FSM_EV_COMP_TERMINATING;
+               else if ((final_st == SA_AMF_PRESENCE_UNINSTANTIATED) &&
+                               (comp->su->pres == 
SA_AMF_PRESENCE_TERMINATION_FAILED))
+                       /*This may be the last NPI comp being terminated in a 
TERM_FAILED SU.
+                         Trigger SU FSM so that AMFND can inform AMFD for 
su-failover.*/
+                       ev = AVND_SU_PRES_FSM_EV_COMP_UNINSTANTIATED;
                else if ((sufailover_in_progress(comp->su) || 
(m_AVND_SU_IS_RESTART(comp->su)) || 
                                        (avnd_cb->term_state == 
AVND_TERM_STATE_NODE_SWITCHOVER_STARTED) || 
                                        (all_comps_terminated_in_su(comp->su) 
== true)) &&
diff --git a/osaf/services/saf/amf/amfnd/di.cc 
b/osaf/services/saf/amf/amfnd/di.cc
--- a/osaf/services/saf/amf/amfnd/di.cc
+++ b/osaf/services/saf/amf/amfnd/di.cc
@@ -1643,7 +1643,7 @@ void avnd_sync_csicomp(AVND_CB *cb)
                add_comp_state_info(&msg, comp);
        }
 
-       LOG_NO("%d CSICOMP states synced", 
msg.info.avd->msg_info.n2d_nd_csicomp_state_info.num_csicomp);
+       LOG_NO("%d CSICOMP states sent", 
msg.info.avd->msg_info.n2d_nd_csicomp_state_info.num_csicomp);
        LOG_NO("%d COMP states sent", 
msg.info.avd->msg_info.n2d_nd_csicomp_state_info.num_comp);
 
        rc = avnd_di_msg_send(cb, &msg);
diff --git a/osaf/services/saf/amf/amfnd/include/avnd_su.h 
b/osaf/services/saf/amf/amfnd/include/avnd_su.h
--- a/osaf/services/saf/amf/amfnd/include/avnd_su.h
+++ b/osaf/services/saf/amf/amfnd/include/avnd_su.h
@@ -395,7 +395,7 @@ extern bool sufailover_in_progress(const
 extern bool sufailover_during_nodeswitchover(const AVND_SU *su);
 extern bool all_csis_in_removed_state(const AVND_SU *su);
 extern void su_reset_restart_count_in_comps(const struct avnd_cb_tag *cb, 
const AVND_SU *su);
-extern bool all_comps_terminated_in_su(const AVND_SU *su);
+extern bool all_comps_terminated_in_su(const AVND_SU *su, bool 
all_final_pres_states = false);
 
 void su_increment_su_restart_count(AVND_SU& su);
 void su_increment_comp_restart_count(AVND_SU& su);
@@ -416,4 +416,5 @@ extern AVND_SU *avnd_sudb_rec_get(AmfDb<
 extern AVND_SU *avnd_sudb_rec_get_next(AmfDb<std::string, AVND_SU>& sudb, 
const std::string& name);
 extern void sudb_rec_comp_add(AVND_SU *su, AVND_COMP *comp, uint32_t *rc);
 uint32_t avnd_evt_avd_compcsi_evh(struct avnd_cb_tag *cb, struct avnd_evt_tag 
*evt);
+bool avnd_su_check_sis_previous_assign_state(AVND_SU_SI_ASSIGN_STATE 
prev_assign_state, AVND_SU *su);
 #endif
diff --git a/osaf/services/saf/amf/amfnd/su.cc 
b/osaf/services/saf/amf/amfnd/su.cc
--- a/osaf/services/saf/amf/amfnd/su.cc
+++ b/osaf/services/saf/amf/amfnd/su.cc
@@ -877,6 +877,20 @@ bool isRestartSet(const AVND_SU *su)
 }
 
 /**
+ * @brief  Checks if all SIs of SU have a given prev_assign_state.
+ * @return true/false
+ */
+bool avnd_su_check_sis_previous_assign_state(AVND_SU_SI_ASSIGN_STATE 
prev_assign_state,
+       AVND_SU *su) {
+  for (AVND_SU_SI_REC *si = (AVND_SU_SI_REC 
*)m_NCS_DBLIST_FIND_FIRST(&su->si_list);
+    si; si = (AVND_SU_SI_REC *)m_NCS_DBLIST_FIND_NEXT(&si->su_dll_node)) {
+       if (!m_AVND_SU_SI_PRV_ASSIGN_STATE_IS_UNASSIGNED(si))
+               return false;
+  }                       
+  return true;
+}
+
+/**
  * @brief  Processes compcsi msg based on the action (msg_type). 
  *             As of now only try to send csi attribute change callback.
  * @param  comp (ptr to AVND_COMP)
diff --git a/osaf/services/saf/amf/amfnd/susm.cc 
b/osaf/services/saf/amf/amfnd/susm.cc
--- a/osaf/services/saf/amf/amfnd/susm.cc
+++ b/osaf/services/saf/amf/amfnd/susm.cc
@@ -59,6 +59,7 @@ static uint32_t avnd_su_pres_restart_com
 static uint32_t avnd_su_pres_restart_compterming_hdler(AVND_CB *, AVND_SU *, 
AVND_COMP *);
 static uint32_t avnd_su_pres_inst_compinstfail_hdler(AVND_CB *, AVND_SU *, 
AVND_COMP *);
 static uint32_t avnd_su_pres_instfailed_compuninst(AVND_CB *, AVND_SU *, 
AVND_COMP *);
+static uint32_t avnd_su_pres_termfailed_comptermfail_or_compuninst(AVND_CB *, 
AVND_SU *, AVND_COMP *);
 
 static uint32_t avnd_su_pres_st_chng_prc(AVND_CB *, AVND_SU *, 
SaAmfPresenceStateT, SaAmfPresenceStateT);
 
@@ -154,8 +155,8 @@ static AVND_SU_PRES_FSM_FN avnd_su_pres_
         0,                     /* COMP INSTANTIATED */
         0,                     /* COMP INST_FAIL */
         0,                     /* COMP RESTARTING */
-        0,                     /* COMP TERM_FAIL */
-        0,                     /* COMP UNINSTANTIATED */
+        avnd_su_pres_termfailed_comptermfail_or_compuninst,    /* COMP 
TERM_FAIL */
+        avnd_su_pres_termfailed_comptermfail_or_compuninst,    /* COMP 
UNINSTANTIATED */
         0,                     /* COMP TERMINATING */
         }
 };
@@ -1362,7 +1363,7 @@ done:
  * @param su
  * @return bool
  */
-bool all_comps_terminated_in_su(const AVND_SU *su)
+bool all_comps_terminated_in_su(const AVND_SU *su, bool all_final_pres_states)
 {
        AVND_COMP *comp;
 
@@ -1370,7 +1371,14 @@ bool all_comps_terminated_in_su(const AV
                        comp;
                        comp = 
m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_NEXT(&comp->su_dll_node))) {
 
-               if (comp->pres != SA_AMF_PRESENCE_UNINSTANTIATED) {
+               if ((all_final_pres_states == false) && (comp->pres != 
SA_AMF_PRESENCE_UNINSTANTIATED)) {
+                       TRACE("'%s' not terminated, pres.st=%u", 
comp->name.c_str(), comp->pres);
+                       return false;
+               }
+               if ((all_final_pres_states == true) &&
+                       (comp->pres != SA_AMF_PRESENCE_UNINSTANTIATED) &&
+                       (comp->pres != SA_AMF_PRESENCE_INSTANTIATION_FAILED) &&
+                       (comp->pres != SA_AMF_PRESENCE_TERMINATION_FAILED)) { 
                        TRACE("'%s' not terminated, pres.st=%u", 
comp->name.c_str(), comp->pres);
                        return false;
                }
@@ -1699,10 +1707,10 @@ uint32_t avnd_su_pres_st_chng_prc(AVND_C
                                TRACE("SU oper state is disabled");
                }
 
-               /* terminating -> term-failed */
+               /* terminating/restarting -> term-failed */
                if (((prv_st == SA_AMF_PRESENCE_RESTARTING) || 
(SA_AMF_PRESENCE_TERMINATING == prv_st)) 
                                && (SA_AMF_PRESENCE_TERMINATION_FAILED == 
final_st)) {
-                       TRACE("Terminating -> Termination Failed");
+                       TRACE("Terminating/Restarting -> Termination Failed");
                        if (sufailover_in_progress(su)) {
                                /*Do not reset any flag, this will be done as a 
part of repair.*/
                                rc = avnd_di_oper_send(cb, su, 
AVSV_ERR_RCVR_SU_FAILOVER);
@@ -1711,13 +1719,54 @@ uint32_t avnd_su_pres_st_chng_prc(AVND_C
                                goto done;
                        }
                        m_AVND_SU_OPER_STATE_SET(su, 
SA_AMF_OPERATIONAL_DISABLED);
-                       /* inform AvD about oper state change */
-                       rc = avnd_di_oper_send(cb, su, 
SA_AMF_COMPONENT_FAILOVER);
+                       /* inform AvD about oper state change, in case prev 
state was TERMINATING.
+                          In RESTARTING case, comp FSM triggers 
comp-failover.*/
+                       if (prv_st == SA_AMF_PRESENCE_TERMINATING)
+                               rc = avnd_di_oper_send(cb, su, 
SA_AMF_COMPONENT_FAILOVER);
                        if (NCSCC_RC_SUCCESS != rc)
                                goto done;
 
                }
-
+               /*instantiated-> term-failed*/
+               if ((prv_st == SA_AMF_PRESENCE_INSTANTIATED) &&
+                               (final_st == 
SA_AMF_PRESENCE_TERMINATION_FAILED)) {
+                       TRACE("Instantiated -> Termination Failed");
+                       /*
+                          This state transition of SU can happen when: 
+                          -its one NPI comp moves to TERM_FAILED state. There 
can 
+                           be two subcases here: a)assigned NPI comp faults or 
b)it 
+                           faults during fresh assignments during 
instantiation phase. 
+                          -its restartable PI comp moves to TERM_FAILED state. 
There
+                           can be two subcases here:a)assigned PI comp fault 
or b)
+                           it faults during fresh assignments in CSI SET 
callback.
+                          In these cases SU moves directly from INSTANTIATED 
to TERM_FAILED state.
+
+                          AMFND should respond to AMFD for su-failover only 
when SU moves
+                          to TERM_FAILED state during fresh assignments.
+                        */
+                       if ((su->si_list.n_nodes != 0) && 
(m_AVND_SU_IS_ASSIGN_PEND(su)) && 
+                          
(avnd_su_check_sis_previous_assign_state(AVND_SU_SI_ASSIGN_STATE_UNASSIGNED, 
su) == true)) {
+                               m_AVND_SU_OPER_STATE_SET(su, 
SA_AMF_OPERATIONAL_DISABLED);
+                               if (all_comps_terminated_in_su(su, true) == 
true) {                     
+                                       TRACE_2("Informing AMFD of 
su-failover");
+                                       rc = avnd_di_oper_send(cb, su, 
AVSV_ERR_RCVR_SU_FAILOVER);
+                                       avnd_su_si_del(avnd_cb, su->name);
+                               } else {
+                                       //Some PI comps are still terminating. 
Try to terminate NPIs.
+                                       for (AVND_COMP *comp = 
m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_FIRST(&su->comp_list));
+                                               comp;
+                                               comp = 
m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_NEXT(&comp->su_dll_node))) {
+                                               if 
(m_AVND_COMP_TYPE_IS_PREINSTANTIABLE(comp))
+                                                       continue;
+                                               rc = 
avnd_comp_clc_fsm_trigger(cb, comp, AVND_COMP_CLC_PRES_FSM_EV_TERM);
+                                               if (NCSCC_RC_SUCCESS != rc) {
+                                                       LOG_ER("'%s' 
termination failed", comp->name.c_str());
+                                                       goto done;
+                                               }
+                                       }
+                               }
+                       } 
+               }
        }
 
        /* npi su */
@@ -3843,3 +3892,25 @@ done:
        TRACE_LEAVE();
        return rc;
 }
+
+static uint32_t avnd_su_pres_termfailed_comptermfail_or_compuninst(AVND_CB 
*cb, AVND_SU *su, AVND_COMP *comp) {
+  uint32_t rc = NCSCC_RC_SUCCESS;
+  const std::string compname = comp ? comp->name : "none";
+  TRACE_ENTER2("CompTermFailed/CompUnInstantiated event in the TermFailed 
state:'%s', '%s'",
+    su->name.c_str(), compname.c_str());
+
+  //PI SU case. 
+  if (m_AVND_SU_IS_PREINSTANTIABLE(su)) {
+    TRACE_1("PI SU");
+    if ((all_comps_terminated_in_su(su, true) == true) && 
+      (su->si_list.n_nodes != 0) &&
+      (m_AVND_SU_IS_ASSIGN_PEND(su)) &&
+      
(avnd_su_check_sis_previous_assign_state(AVND_SU_SI_ASSIGN_STATE_UNASSIGNED, 
su) == true)) {
+      TRACE_2("Informing AMFD of su-failover");
+      rc = avnd_di_oper_send(cb, su, AVSV_ERR_RCVR_SU_FAILOVER);
+      avnd_su_si_del(cb, su->name);
+    }
+  }
+  TRACE_LEAVE();
+  return rc;
+}

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most 
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to