Hi Gary,
You can move the following changes in the beginning of the function
avd_node_up_evh():
+ if (amfnd_svc_db->find(n2d_msg->msg_info.n2d_node_up.node_id) ==
amfnd_svc_db->end()) {
+ // don't process node_up until svc up is received
+ LOG_WA("amfnd svc up not yet received from node %x",
n2d_msg->msg_info.n2d_node_up.node_id);
+ goto done;
+ }
+
Thanks
-Nagu
> -----Original Message-----
> From: Gary Lee [mailto:[email protected]]
> Sent: 24 October 2016 13:19
> To: [email protected]; Nagendra Kumar; Praveen Malviya;
> [email protected]; [email protected]
> Cc: [email protected]
> Subject: [PATCH 1 of 1] amfd: ignore node_up until the mds event amfnd up
> has been received [#2124]
>
> osaf/services/saf/amf/amfd/include/node.h | 1 +
> osaf/services/saf/amf/amfd/main.cc | 1 +
> osaf/services/saf/amf/amfd/mds.cc | 18 ++++++++++--------
> osaf/services/saf/amf/amfd/ndfsm.cc | 20 ++++++++++++++++++--
> 4 files changed, 30 insertions(+), 10 deletions(-)
>
>
> if the svc up event for amfnd arrives after N2D_NODE_UP, then amfd may
> fail to send D2N_NODE_UP to amfnd. This will eventually cause the
> respective amfnd
> to reboot the node, due to message ID mismatches. Instead, ignore node up
> until we have received the mds event 'amfnd up' for that node.
>
> diff --git a/osaf/services/saf/amf/amfd/include/node.h
> b/osaf/services/saf/amf/amfd/include/node.h
> --- a/osaf/services/saf/amf/amfd/include/node.h
> +++ b/osaf/services/saf/amf/amfd/include/node.h
> @@ -162,6 +162,7 @@ struct NodeNameCompare: public std::bina
> extern AmfDb<std::string, AVD_AVND> *node_name_db;
> extern AmfDb<uint32_t, AVD_AVND> *node_id_db;
> extern AmfDb<uint32_t, AVD_FAIL_OVER_NODE> *node_list_db;
> +extern std::set<uint32_t> *amfnd_svc_db;
> extern std::map<SaClmNodeIdT, MDS_SVC_PVT_SUB_PART_VER>
> nds_mds_ver_db;
> class AVD_AMF_NG {
> public:
> diff --git a/osaf/services/saf/amf/amfd/main.cc
> b/osaf/services/saf/amf/amfd/main.cc
> --- a/osaf/services/saf/amf/amfd/main.cc
> +++ b/osaf/services/saf/amf/amfd/main.cc
> @@ -573,6 +573,7 @@ static uint32_t initialize(void)
> base::GetEnv("OSAF_AMF_MIN_CLUSTER_SIZE", uint32_t{2});
>
> node_list_db = new AmfDb<uint32_t, AVD_FAIL_OVER_NODE>;
> + amfnd_svc_db = new std::set<uint32_t>;
> /* get the node id of the node on which the AVD is running. */
> cb->node_id_avd = m_NCS_GET_NODE_ID;
>
> diff --git a/osaf/services/saf/amf/amfd/mds.cc
> b/osaf/services/saf/amf/amfd/mds.cc
> --- a/osaf/services/saf/amf/amfd/mds.cc
> +++ b/osaf/services/saf/amf/amfd/mds.cc
> @@ -420,16 +420,18 @@ static uint32_t avd_mds_svc_evt(MDS_CALL
>
> case NCSMDS_SVC_ID_AVND:
> {
> + AVD_EVT *evt = new AVD_EVT();
> +
> + evt->rcv_evt = AVD_EVT_MDS_AVND_UP;
> + evt->info.node_id =
> m_NCS_NODE_ID_FROM_MDS_DEST(evt_info->i_dest);
> if (evt_info->i_node_id == cb->node_id_avd)
> {
> - AVD_EVT *evt = new AVD_EVT();
> + cb->local_avnd_adest = evt_info-
> >i_dest;
> + }
> + if (m_NCS_IPC_SEND(&cb->avd_mbx, evt,
> NCS_IPC_PRIORITY_HIGH) != NCSCC_RC_SUCCESS) {
> + LOG_ER("%s: ncs_ipc_send failed",
> __FUNCTION__);
> + delete evt;
> + }
>
> - evt->rcv_evt =
> AVD_EVT_MDS_AVND_UP;
> - cb->local_avnd_adest = evt_info-
> >i_dest;
> - if (m_NCS_IPC_SEND(&cb-
> >avd_mbx, evt, NCS_IPC_PRIORITY_HIGH) != NCSCC_RC_SUCCESS) {
> - LOG_ER("%s: ncs_ipc_send
> failed", __FUNCTION__);
> - delete evt;
> - }
> - }
> //Post MDS version info to mailbox.
> AVD_EVT *evt1 = new AVD_EVT();
> evt1->rcv_evt =
> AVD_EVT_ND_MDS_VER_INFO;
> diff --git a/osaf/services/saf/amf/amfd/ndfsm.cc
> b/osaf/services/saf/amf/amfd/ndfsm.cc
> --- a/osaf/services/saf/amf/amfd/ndfsm.cc
> +++ b/osaf/services/saf/amf/amfd/ndfsm.cc
> @@ -32,6 +32,10 @@
>
> AmfDb<uint32_t, AVD_FAIL_OVER_NODE> *node_list_db = 0; /*
> SaClmNodeIdT index */
>
> +// indicates whether MDS service up has been received from amfnd.
> +// If a node ID is in the set, then service up has been received.
> +std::set<uint32_t> *amfnd_svc_db = 0;
> +
>
> /*************************************************************
> ****************
> * Function: avd_process_state_info_queue
> *
> @@ -359,6 +363,12 @@ void avd_node_up_evh(AVD_CL_CB *cb, AVD_
> goto done;
> }
>
> + if (amfnd_svc_db->find(n2d_msg->msg_info.n2d_node_up.node_id)
> == amfnd_svc_db->end()) {
> + // don't process node_up until svc up is received
> + LOG_WA("amfnd svc up not yet received from node %x",
> n2d_msg->msg_info.n2d_node_up.node_id);
> + goto done;
> + }
> +
> if ((n2d_msg->msg_info.n2d_node_up.node_id == cb->node_id_avd)
> && (cb->init_state < AVD_INIT_DONE)) {
> // node up from local AVND
> avd_process_state_info_queue(cb);
> @@ -645,8 +655,13 @@ void avd_nd_ncs_su_failed(AVD_CL_CB *cb,
>
> void avd_mds_avnd_up_evh(AVD_CL_CB *cb, AVD_EVT *evt)
> {
> - TRACE("Local node director is up, start sending heart beats to %"
> PRIx64, cb->local_avnd_adest);
> - avd_tmr_snd_hb_evh(cb, evt);
> + if (evt->info.node_id == cb->node_id_avd) {
> + TRACE("Local node director is up, start sending heart beats
> to %" PRIx64, cb->local_avnd_adest);
> + avd_tmr_snd_hb_evh(cb, evt);
> + }
> +
> + TRACE("amfnd on %x is up", evt->info.node_id);
> + amfnd_svc_db->insert(evt->info.node_id);
> }
>
>
> /*************************************************************
> ****************
> @@ -672,6 +687,7 @@ void avd_mds_avnd_down_evh(AVD_CL_CB *cb
>
> //update MDS version db.
> nds_mds_ver_db.erase(evt->info.node_id);
> + amfnd_svc_db->erase(evt->info.node_id);
>
> if (node != nullptr) {
> // Do nothing if the local node goes down. Most likely due to
> system shutdown.
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel