If all nodes are synced after headless, the timer is stopped
but node_sync_window_closed is never set to true.

Later on, if a node becomes split from the main network and
rejoins, it will send a headless sync to amfd.

amfd will go into a never ending loop of processing the message,
putting back into the queue, etc.

When the node sync timer is stopped, ensure node_sync_window_closed
is set.

Also modify avd_count_node_up() not to count standby SC.
Sometimes a node_up from the standby SC arrives before mds up,
and the stadnby SC is incorrectly included in the node sync
count. Then a legitimate node_up from a PL is not accepted
because node_sync_window_closed is prematurely set.
---
 src/amf/amfd/ndfsm.cc | 28 +++-------------------------
 1 file changed, 3 insertions(+), 25 deletions(-)

diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc
index edc993988..375c5c7b1 100644
--- a/src/amf/amfd/ndfsm.cc
+++ b/src/amf/amfd/ndfsm.cc
@@ -165,34 +165,12 @@ done:
  *
  **************************************************************************/
 uint32_t avd_count_sync_node_size(AVD_CL_CB *cb) {
-  uint32_t twon_ncs_su_count = 0;
   uint32_t count = 0;
   TRACE_ENTER();
 
-  for (const auto &value : *node_name_db) {
-    AVD_AVND *avnd = value.second;
-    osafassert(avnd);
-    for (const auto &su : avnd->list_of_ncs_su) {
-      if (su->sg_of_su->sg_redundancy_model == SA_AMF_2N_REDUNDANCY_MODEL) {
-        twon_ncs_su_count++;
-        continue;
-      }
-    }
-  }
-  // cluster can have 1 SC or more SCs which hosting 2N Opensaf SU
-  // so twon_ncs_su_count at least is 1
-  osafassert(twon_ncs_su_count > 0);
-
-  if (twon_ncs_su_count == 1) {
-    // 1 SC, the rest of nodes could be in sync from headless
-    count = node_name_db->size() - 1;
-  } else {
-    // >=2 SCs, the rest of nodes could be in sync except active/standby SC
-    count = node_name_db->size() - 2;
-  }
+  count = node_name_db->size() - 1;
 
   TRACE("sync node size:%d", count);
-  TRACE_LEAVE();
   return count;
 }
 /*****************************************************************************
@@ -218,8 +196,7 @@ uint32_t avd_count_node_up(AVD_CL_CB *cb) {
   for (const auto &value : *node_name_db) {
     node = value.second;
     if (node->node_up_msg_count > 0 &&
-        node->node_info.nodeId != cb->node_id_avd &&
-        node->node_info.nodeId != cb->node_id_avd_other)
+        node->node_info.nodeId != cb->node_id_avd)
       ++received_count;
   }
   TRACE("Number of node director(s) that director received node_up msg:%u",
@@ -329,6 +306,7 @@ void avd_node_up_evh(AVD_CL_CB *cb, AVD_EVT *evt) {
       if (cb->node_sync_tmr.is_active) {
         avd_stop_tmr(cb, &cb->node_sync_tmr);
         TRACE("stop NodeSync timer");
+        cb->node_sync_window_closed = true;
       }
       cb->all_nodes_synced = true;
       LOG_NO("Received node_up_msg from all nodes");
-- 
2.17.1



_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to