osaf/services/saf/amf/amfd/include/node.h |   1 +
 osaf/services/saf/amf/amfd/main.cc        |   1 +
 osaf/services/saf/amf/amfd/mds.cc         |  18 ++++++++++--------
 osaf/services/saf/amf/amfd/ndfsm.cc       |  20 ++++++++++++++++++--
 4 files changed, 30 insertions(+), 10 deletions(-)


if the svc up event for amfnd arrives after N2D_NODE_UP, then amfd may
fail to send D2N_NODE_UP to amfnd. This will eventually cause the respective 
amfnd
to reboot the node, due to message ID mismatches. Instead, ignore node up
until we have received the mds event 'amfnd up' for that node.

diff --git a/osaf/services/saf/amf/amfd/include/node.h 
b/osaf/services/saf/amf/amfd/include/node.h
--- a/osaf/services/saf/amf/amfd/include/node.h
+++ b/osaf/services/saf/amf/amfd/include/node.h
@@ -162,6 +162,7 @@ struct NodeNameCompare: public std::bina
 extern AmfDb<std::string, AVD_AVND> *node_name_db;
 extern AmfDb<uint32_t, AVD_AVND> *node_id_db;
 extern AmfDb<uint32_t, AVD_FAIL_OVER_NODE> *node_list_db;
+extern std::set<uint32_t> *amfnd_svc_db; 
 extern std::map<SaClmNodeIdT, MDS_SVC_PVT_SUB_PART_VER> nds_mds_ver_db;
 class AVD_AMF_NG {
 public:
diff --git a/osaf/services/saf/amf/amfd/main.cc 
b/osaf/services/saf/amf/amfd/main.cc
--- a/osaf/services/saf/amf/amfd/main.cc
+++ b/osaf/services/saf/amf/amfd/main.cc
@@ -573,6 +573,7 @@ static uint32_t initialize(void)
             base::GetEnv("OSAF_AMF_MIN_CLUSTER_SIZE", uint32_t{2});
 
         node_list_db = new AmfDb<uint32_t, AVD_FAIL_OVER_NODE>;
+       amfnd_svc_db = new std::set<uint32_t>;
        /* get the node id of the node on which the AVD is running. */
        cb->node_id_avd = m_NCS_GET_NODE_ID;
 
diff --git a/osaf/services/saf/amf/amfd/mds.cc 
b/osaf/services/saf/amf/amfd/mds.cc
--- a/osaf/services/saf/amf/amfd/mds.cc
+++ b/osaf/services/saf/amf/amfd/mds.cc
@@ -420,16 +420,18 @@ static uint32_t avd_mds_svc_evt(MDS_CALL
 
                case NCSMDS_SVC_ID_AVND:
                        {       
+                               AVD_EVT *evt = new AVD_EVT();
+
+                               evt->rcv_evt = AVD_EVT_MDS_AVND_UP;
+                               evt->info.node_id = 
m_NCS_NODE_ID_FROM_MDS_DEST(evt_info->i_dest);
                                if (evt_info->i_node_id == cb->node_id_avd) {
-                                       AVD_EVT *evt = new AVD_EVT();
+                                       cb->local_avnd_adest = evt_info->i_dest;
+                               }
+                               if (m_NCS_IPC_SEND(&cb->avd_mbx, evt, 
NCS_IPC_PRIORITY_HIGH) != NCSCC_RC_SUCCESS) {
+                                       LOG_ER("%s: ncs_ipc_send failed", 
__FUNCTION__);
+                                       delete evt;
+                               }
 
-                                       evt->rcv_evt = AVD_EVT_MDS_AVND_UP;
-                                       cb->local_avnd_adest = evt_info->i_dest;
-                                       if (m_NCS_IPC_SEND(&cb->avd_mbx, evt, 
NCS_IPC_PRIORITY_HIGH) != NCSCC_RC_SUCCESS) {
-                                               LOG_ER("%s: ncs_ipc_send 
failed", __FUNCTION__);
-                                               delete evt;
-                                       }
-                               }
                                //Post MDS version info to mailbox. 
                                AVD_EVT *evt1 = new AVD_EVT();
                                evt1->rcv_evt = AVD_EVT_ND_MDS_VER_INFO;
diff --git a/osaf/services/saf/amf/amfd/ndfsm.cc 
b/osaf/services/saf/amf/amfd/ndfsm.cc
--- a/osaf/services/saf/amf/amfd/ndfsm.cc
+++ b/osaf/services/saf/amf/amfd/ndfsm.cc
@@ -32,6 +32,10 @@
 
 AmfDb<uint32_t, AVD_FAIL_OVER_NODE> *node_list_db = 0;      /* SaClmNodeIdT 
index */
 
+// indicates whether MDS service up has been received from amfnd.
+// If a node ID is in the set, then service up has been received.
+std::set<uint32_t> *amfnd_svc_db = 0;
+
 /*****************************************************************************
  * Function: avd_process_state_info_queue
  *
@@ -359,6 +363,12 @@ void avd_node_up_evh(AVD_CL_CB *cb, AVD_
                goto done;
        }
 
+       if (amfnd_svc_db->find(n2d_msg->msg_info.n2d_node_up.node_id) == 
amfnd_svc_db->end()) {
+               // don't process node_up until svc up is received
+               LOG_WA("amfnd svc up not yet received from node %x", 
n2d_msg->msg_info.n2d_node_up.node_id);
+               goto done;
+       }
+
        if ((n2d_msg->msg_info.n2d_node_up.node_id == cb->node_id_avd) && 
(cb->init_state < AVD_INIT_DONE)) {
                // node up from local AVND
                avd_process_state_info_queue(cb);
@@ -645,8 +655,13 @@ void avd_nd_ncs_su_failed(AVD_CL_CB *cb,
 
 void avd_mds_avnd_up_evh(AVD_CL_CB *cb, AVD_EVT *evt)
 {
-       TRACE("Local node director is up, start sending heart beats to %" 
PRIx64, cb->local_avnd_adest);
-       avd_tmr_snd_hb_evh(cb, evt);
+       if (evt->info.node_id == cb->node_id_avd) {
+               TRACE("Local node director is up, start sending heart beats to 
%" PRIx64, cb->local_avnd_adest);
+               avd_tmr_snd_hb_evh(cb, evt);
+       }
+
+       TRACE("amfnd on %x is up", evt->info.node_id);
+       amfnd_svc_db->insert(evt->info.node_id);
 }
 
 /*****************************************************************************
@@ -672,6 +687,7 @@ void avd_mds_avnd_down_evh(AVD_CL_CB *cb
        
        //update MDS version db.
        nds_mds_ver_db.erase(evt->info.node_id);
+       amfnd_svc_db->erase(evt->info.node_id);
 
        if (node != nullptr) {
                // Do nothing if the local node goes down. Most likely due to 
system shutdown.

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most 
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to