src/amf/amfd/sg_nway_fsm.cc |  192 +++++++++++++++++++++++++++++++------------
 src/amf/amfd/su.cc          |   11 ++
 src/amf/amfd/su.h           |    1 +
 3 files changed, 151 insertions(+), 53 deletions(-)


During lock/shutdown operation on SU, Node and NG, some times AMFD is not 
removing
quiesced assignments. The ticket reports this problem when sidep is enabled and 
disabled.

This issue comes in a specific situation when AMFD is unable to switchover all
the assignments of locked SU and AMFD removes the locked SU from the oper_list
before the removal of all assignments. Also in realign state the case to send
removal of assignments to quiesced SU when new active SU is in oper_list is not 
handled.
Assignment distribution is like this :
SI1 and SI2 active in SU1 and SI3 standby in SU1.
SI1 and SI2 standby in SU3 and SI3 active in SU2.
During SU1 lock, AMFD sends quiesced HA state for SI1 and SI2 to SU1 and add it 
in
oper_list and marks SG in SU_OPER. After SI1 gets quiesced, AMFD sends active
for it to SU2. Before SI1 becomes active in SU2, SI2 becomes quiesced. SU2 
cannot take further
active assignment because of configuration limitation. AMFD sends removal of 
SI2 in both SU1 and
SU2. AMFD gets removal response from SU1 for SI2 and it removes SU1 from 
oper_list and
marks SG in REALIGN state. When SI1 becomes active in SU2, AMFD does not remove 
quiesced assignment
from SU1 as this case is not handled.

Patch fixes the problem to send removal of assignment when SIs becomes active 
in other SUs and when
SI dep is not configured.

Note: Patch does not cover SI dep issue.

diff --git a/src/amf/amfd/sg_nway_fsm.cc b/src/amf/amfd/sg_nway_fsm.cc
--- a/src/amf/amfd/sg_nway_fsm.cc
+++ b/src/amf/amfd/sg_nway_fsm.cc
@@ -45,7 +45,9 @@ static void avd_sg_nway_node_fail_sg_rea
 static AVD_SU_SI_REL * find_pref_standby_susi(AVD_SU_SI_REL *sisu);
 
 /* macro to determine if all the active sis assigned to a 
-   particular su have been successfully engaged by the standby */
+   particular su have been successfully engaged by the standby
+   TODO: Replace with avd_sg_nway_are_stdby_sus_engaged().
+ */
 #define m_AVD_SG_NWAY_ARE_STDBY_SUS_ENGAGED(su, susi, is) \
 { \
    AVD_SU_SI_REL *csusi = 0, *csisu = 0; \
@@ -74,6 +76,95 @@ static AVD_SU_SI_REL * find_pref_standby
    } \
 };
 
+/* 
+ * @brief      Function equivalent of above macro to determine if all the 
+ *             active sis assigned to a particular su have been successfully
+ *            engaged by the standby
+ * 
+ * @param[in]  ptr to SU.
+ * @param[in]  ptr to SUSI.
+ *
+ * @return     true/false.
+ */
+bool avd_sg_nway_are_stdby_sus_engaged(AVD_SU *su, AVD_SU_SI_REL *susi) { 
+  TRACE_ENTER();
+  AVD_SU_SI_REL *csusi = nullptr, *csisu = nullptr; 
+  bool is_si_eng; 
+  bool ret = true; 
+  for (csusi = su->list_of_susi; csusi; csusi = csusi->su_next) { 
+    if ((csusi == susi) || (SA_AMF_HA_STANDBY == csusi->state)) 
+         continue; 
+    /*
+       Case1:If csusi is going active, quiesced or quiescing modification in 
this SU
+       then wait for its completion.
+     */
+    if ((SA_AMF_HA_ACTIVE == csusi->state) || 
+        (SA_AMF_HA_QUIESCING == csusi->state) || 
+       ((SA_AMF_HA_QUIESCED == csusi->state) && 
+        (AVD_SU_SI_STATE_MODIFY == csusi->fsm))) { 
+       ret = false; 
+       break; 
+    }
+    //Case2: Check if csusi is Active in any other SU. 
+    is_si_eng = false; 
+    for (csisu = csusi->si->list_of_sisu; csisu; csisu = csisu->si_next) { 
+      if (SA_AMF_HA_ACTIVE == csisu->state)
+         break; 
+    }
+    /*
+       -If csisu is not nullptr then it cannot be in this SU otherwise 
+        we would have returned from case1.
+       -If csisu is nullptr then it is either STANDBY or
+        (Quiesced, fsm = Assigned). In both of these cases it is eligible
+        for deletion as its failover/switchover cannot be done.
+        Note: In future, for case of SI deps, csisu can be in quiesced, 
assigned state
+           because failover of sponsor in this SU is going on.
+       -Check if csisu has already become active for this csisu->SI in
+        some other SU. If it is so that means standby SUSI for SI 
+        is already engaged.
+     */ 
+    if (!csisu || (csisu && (AVD_SU_SI_STATE_ASGND == csisu->fsm))) 
+      is_si_eng = true; 
+    if (false == is_si_eng) { 
+      ret = false; 
+      break; 
+    } 
+  } 
+  TRACE_LEAVE2("ret:%u",ret);
+  return ret;
+}
+
+/**
+ * @brief  Checks if SUSIs are in either quiesced (ignores standby HA state).
+ * @return true/false
+ */
+static bool all_active_susis_are_quieced(const AVD_SU *su) {
+  AVD_SU_SI_REL *susi;
+  for (susi = su->list_of_susi; susi; susi = susi->su_next) {
+    if (susi->state == SA_AMF_HA_STANDBY)
+        continue;
+    if ((susi->state == SA_AMF_HA_QUIESCING) || 
+       (susi->state == SA_AMF_HA_ACTIVE))
+      return false;
+    //check if quiesced modification is completed or not.
+    if ((susi->state == SA_AMF_HA_QUIESCED) && 
+       (susi->fsm == AVD_SU_SI_STATE_MODIFY)) 
+      return false;
+  }
+  return true;
+}
+
+static void su_or_node_shutting_down_to_locked(AVD_SU *su) {
+  if ((su->saAmfSUAdminState == SA_AMF_ADMIN_SHUTTING_DOWN) &&
+       ((all_active_susis_are_quieced(su) == true) ||
+       (su->all_susis_fsm_state(AVD_SU_SI_STATE_UNASGN) == true))) {
+    su->set_admin_state(SA_AMF_ADMIN_LOCKED);
+  } else if (su->su_on_node->saAmfNodeAdminState == 
SA_AMF_ADMIN_SHUTTING_DOWN) {
+    if (su->su_on_node->is_node_lock() == true)
+      node_admin_state_set(su->su_on_node, SA_AMF_ADMIN_LOCKED);
+  }
+}
+
 uint32_t SG_NWAY::si_assign(AVD_CL_CB *cb, AVD_SI *si) {
        uint32_t rc = NCSCC_RC_SUCCESS;
        
@@ -2209,6 +2300,7 @@ uint32_t avd_sg_nway_susi_succ_sg_realig
                } else {
                        /* check if su is present in the su-oper list */
                        m_AVD_CHK_OPLIST(su, is_su_present);
+                       TRACE("is_su_present:%u",is_su_present);
                        if (is_su_present) {
                                /* if other susis of this SU are assigned, 
remove it from the su-oper list */
                                for (curr_susi = su->list_of_susi;
@@ -2217,43 +2309,28 @@ uint32_t avd_sg_nway_susi_succ_sg_realig
 
                                if (!curr_susi)
                                        avd_sg_su_oper_list_del(cb, su, false);
+
                        } else {
                                if ((susi->state == SA_AMF_HA_ACTIVE) && 
(susi->si->num_dependents > 0)) {
                                        
avd_sidep_send_active_to_dependents(susi->si);
                                }
-
-                               /* identify the quiesced susi assignment */
-                               for (curr_susi = susi->si->list_of_sisu;
-                                    curr_susi && (SA_AMF_HA_QUIESCED != 
curr_susi->state);
-                                    curr_susi = curr_susi->si_next) ;
-
-                               if (curr_susi) {
-                                       /* the corresponding su should be in 
the su-oper list */
-                                       m_AVD_CHK_OPLIST(curr_susi->su, 
is_su_present);
-                                       if (is_su_present) {
-                                               /* determine if all the standby 
sus are engaged */
-                                               
m_AVD_SG_NWAY_ARE_STDBY_SUS_ENGAGED(curr_susi->su, 0, is_eng);
-                                               if (true == is_eng) {
-                                                       /* send remove all msg 
for all sis for this su */
-                                                       rc = 
avd_sg_su_si_del_snd(cb, curr_susi->su);
-                                                       if (NCSCC_RC_SUCCESS != 
rc) {
-                                                               LOG_ER("%s:%u: 
%s (%zu)", __FILE__, __LINE__, curr_susi->su->name.c_str(),
-                                                                               
                 curr_susi->su->name.length());
-                                                               goto done;
-                                                       }
-
-                                                       su_node_ptr = 
su->get_node_ptr();
-
-                                                       /* if su-shutdown 
semantics in progress, mark it locked */
-                                                       if 
(su->saAmfSUAdminState == SA_AMF_ADMIN_SHUTTING_DOWN) {
-                                                               
su->set_admin_state(SA_AMF_ADMIN_LOCKED);
-                                                       } else if 
(su_node_ptr->saAmfNodeAdminState ==
-                                                                  
SA_AMF_ADMIN_SHUTTING_DOWN) {
-                                                               if 
(su_node_ptr->is_node_lock() == true) {
-                                                                       
node_admin_state_set(su_node_ptr,
-                                                                               
             SA_AMF_ADMIN_LOCKED);
-                                                               }
-                                                       }
+                       }
+                       /* If this active assignment was part of switchover 
then identify 
+                          the quiesced susi assignment and send removal to it.
+                        */
+                       for (curr_susi = susi->si->list_of_sisu;
+                               curr_susi && (SA_AMF_HA_QUIESCED != 
curr_susi->state);
+                                       curr_susi = curr_susi->si_next) ;
+                       if (curr_susi) {
+                               m_AVD_CHK_OPLIST(curr_susi->su, is_su_present);
+                               if (is_su_present == true) {
+                                       is_eng = 
avd_sg_nway_are_stdby_sus_engaged(curr_susi->su, nullptr);
+                                       if ((is_eng == true) &&
+                                       (curr_susi->su->saAmfSuReadinessState 
== SA_AMF_READINESS_OUT_OF_SERVICE)) {
+                                               rc = avd_sg_su_si_del_snd(cb, 
curr_susi->su);
+                                               if (NCSCC_RC_SUCCESS != rc) {
+                                                       LOG_ER("Send for 
deletion failed for '%s'", curr_susi->su->name.c_str());
+                                                       goto done;
                                                }
                                        }
                                }
@@ -2581,17 +2658,6 @@ uint32_t SG_NWAY::susi_success_su_oper(A
                                        goto done;
                                }
 
-                               su_node_ptr = su->get_node_ptr();
-
-                               /* if su-shutdown semantics in progress, mark 
it locked */
-                               if (su->saAmfSUAdminState == 
SA_AMF_ADMIN_SHUTTING_DOWN) {
-                                       
su->set_admin_state(SA_AMF_ADMIN_LOCKED);
-                               } else if (su_node_ptr->saAmfNodeAdminState == 
SA_AMF_ADMIN_SHUTTING_DOWN) {
-                                       if (su_node_ptr->is_node_lock() == 
true) {
-                                               
node_admin_state_set(su_node_ptr, SA_AMF_ADMIN_LOCKED);
-                                       }
-                               }
-
                                /* transition to sg-realign state */
                                m_AVD_SET_SG_FSM(cb, sg, AVD_SG_FSM_SG_REALIGN);
                        }
@@ -2600,6 +2666,7 @@ uint32_t SG_NWAY::susi_success_su_oper(A
                         */
                        susi->si->delete_assignments(cb);
                }
+               su_or_node_shutting_down_to_locked(su);
        } else if (susi && (SA_AMF_HA_ACTIVE == state) && (AVSV_SUSI_ACT_DEL != 
act)) {
                /* => single active assignment success */
 
@@ -2674,15 +2741,34 @@ uint32_t SG_NWAY::susi_success_su_oper(A
                        avd_sg_nway_si_assign(cb, sg);
        } else if (susi && (AVSV_SUSI_ACT_DEL == act)) {
                /* Single susi delete success processing */
-
-               /* delete the su from the oper list */
-               avd_sg_su_oper_list_del(cb, su, false);
-
+               AVD_SU *su = susi->su;
+               if ((su->saAmfSUAdminState == SA_AMF_ADMIN_LOCKED) ||
+                               (su->su_on_node->saAmfNodeAdminState == 
SA_AMF_ADMIN_LOCKED)) {
+                       /* determine if all the standby sus are engaged */
+                       is_eng = avd_sg_nway_are_stdby_sus_engaged(su, nullptr);
+                       if (is_eng == true) {
+                               /*
+                                  Deletion for this SUSI could have been sent 
because this SI cannot
+                                  be switch-overed/fail-overeed while trying 
to failover all SUSIs of this SU.
+                                  If all SUSIs of this SU are now engaged to 
their respective actives then
+                                  send SU level removal.
+                                */
+                               rc = avd_sg_su_si_del_snd(cb, su);
+                               if (NCSCC_RC_SUCCESS != rc) {
+                                       LOG_ER("Send for deletion failed for 
'%s'", su->name.c_str());
+                                       goto done;
+                               }
+                               avd_sg_su_oper_list_add(cb, su, false);
+                               m_AVD_SET_SG_FSM(cb, sg, AVD_SG_FSM_SG_REALIGN);
+                       }
+               } else {
+                       avd_sg_su_oper_list_del(cb, su, false);
+               }
                /* free all the CSI assignments  */
-                avd_compcsi_delete(cb, susi, false);
-
-                /* free susi assignment */
-                m_AVD_SU_SI_TRG_DEL(cb, susi);
+               avd_compcsi_delete(cb, susi, false);
+
+               /* free susi assignment */
+               m_AVD_SU_SI_TRG_DEL(cb, susi);
 
 
                /* transition to sg-realign state or initiate si assignments */
diff --git a/src/amf/amfd/su.cc b/src/amf/amfd/su.cc
--- a/src/amf/amfd/su.cc
+++ b/src/amf/amfd/su.cc
@@ -2618,3 +2618,14 @@ void AVD_SU::set_surestart(bool value)
 bool AVD_SU::get_surestart() const
 { return surestart; }
 
+/**
+ * @brief  Checks if all SIs assigned to this SU are in same fsm state.
+ * @return true/false
+ */
+bool AVD_SU::all_susis_fsm_state(uint32_t fsm) const {
+  for (AVD_SU_SI_REL *susi = list_of_susi; susi; susi = susi->su_next) {
+    if (susi->fsm != fsm)
+      return false;
+  }
+  return true;
+}
diff --git a/src/amf/amfd/su.h b/src/amf/amfd/su.h
--- a/src/amf/amfd/su.h
+++ b/src/amf/amfd/su.h
@@ -147,6 +147,7 @@ class AVD_SU {
        bool all_comps_in_presence_state(SaAmfPresenceStateT pres) const;
        void set_surestart(bool state);
        bool get_surestart() const;
+       bool all_susis_fsm_state(uint32_t fsm) const;
 
  private:
        void initialize();

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most 
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to