Hi Gary,

You can move the following changes in the beginning of the function 
avd_node_up_evh():

+       if (amfnd_svc_db->find(n2d_msg->msg_info.n2d_node_up.node_id) == 
amfnd_svc_db->end()) {
+               // don't process node_up until svc up is received
+               LOG_WA("amfnd svc up not yet received from node %x", 
n2d_msg->msg_info.n2d_node_up.node_id);
+               goto done;
+       }
+

Thanks
-Nagu

> -----Original Message-----
> From: Gary Lee [mailto:[email protected]]
> Sent: 24 October 2016 13:19
> To: [email protected]; Nagendra Kumar; Praveen Malviya;
> [email protected]; [email protected]
> Cc: [email protected]
> Subject: [PATCH 1 of 1] amfd: ignore node_up until the mds event amfnd up
> has been received [#2124]
> 
>  osaf/services/saf/amf/amfd/include/node.h |   1 +
>  osaf/services/saf/amf/amfd/main.cc        |   1 +
>  osaf/services/saf/amf/amfd/mds.cc         |  18 ++++++++++--------
>  osaf/services/saf/amf/amfd/ndfsm.cc       |  20 ++++++++++++++++++--
>  4 files changed, 30 insertions(+), 10 deletions(-)
> 
> 
> if the svc up event for amfnd arrives after N2D_NODE_UP, then amfd may
> fail to send D2N_NODE_UP to amfnd. This will eventually cause the
> respective amfnd
> to reboot the node, due to message ID mismatches. Instead, ignore node up
> until we have received the mds event 'amfnd up' for that node.
> 
> diff --git a/osaf/services/saf/amf/amfd/include/node.h
> b/osaf/services/saf/amf/amfd/include/node.h
> --- a/osaf/services/saf/amf/amfd/include/node.h
> +++ b/osaf/services/saf/amf/amfd/include/node.h
> @@ -162,6 +162,7 @@ struct NodeNameCompare: public std::bina
>  extern AmfDb<std::string, AVD_AVND> *node_name_db;
>  extern AmfDb<uint32_t, AVD_AVND> *node_id_db;
>  extern AmfDb<uint32_t, AVD_FAIL_OVER_NODE> *node_list_db;
> +extern std::set<uint32_t> *amfnd_svc_db;
>  extern std::map<SaClmNodeIdT, MDS_SVC_PVT_SUB_PART_VER>
> nds_mds_ver_db;
>  class AVD_AMF_NG {
>  public:
> diff --git a/osaf/services/saf/amf/amfd/main.cc
> b/osaf/services/saf/amf/amfd/main.cc
> --- a/osaf/services/saf/amf/amfd/main.cc
> +++ b/osaf/services/saf/amf/amfd/main.cc
> @@ -573,6 +573,7 @@ static uint32_t initialize(void)
>              base::GetEnv("OSAF_AMF_MIN_CLUSTER_SIZE", uint32_t{2});
> 
>          node_list_db = new AmfDb<uint32_t, AVD_FAIL_OVER_NODE>;
> +     amfnd_svc_db = new std::set<uint32_t>;
>       /* get the node id of the node on which the AVD is running. */
>       cb->node_id_avd = m_NCS_GET_NODE_ID;
> 
> diff --git a/osaf/services/saf/amf/amfd/mds.cc
> b/osaf/services/saf/amf/amfd/mds.cc
> --- a/osaf/services/saf/amf/amfd/mds.cc
> +++ b/osaf/services/saf/amf/amfd/mds.cc
> @@ -420,16 +420,18 @@ static uint32_t avd_mds_svc_evt(MDS_CALL
> 
>               case NCSMDS_SVC_ID_AVND:
>                       {
> +                             AVD_EVT *evt = new AVD_EVT();
> +
> +                             evt->rcv_evt = AVD_EVT_MDS_AVND_UP;
> +                             evt->info.node_id =
> m_NCS_NODE_ID_FROM_MDS_DEST(evt_info->i_dest);
>                               if (evt_info->i_node_id == cb->node_id_avd)
> {
> -                                     AVD_EVT *evt = new AVD_EVT();
> +                                     cb->local_avnd_adest = evt_info-
> >i_dest;
> +                             }
> +                             if (m_NCS_IPC_SEND(&cb->avd_mbx, evt,
> NCS_IPC_PRIORITY_HIGH) != NCSCC_RC_SUCCESS) {
> +                                     LOG_ER("%s: ncs_ipc_send failed",
> __FUNCTION__);
> +                                     delete evt;
> +                             }
> 
> -                                     evt->rcv_evt =
> AVD_EVT_MDS_AVND_UP;
> -                                     cb->local_avnd_adest = evt_info-
> >i_dest;
> -                                     if (m_NCS_IPC_SEND(&cb-
> >avd_mbx, evt, NCS_IPC_PRIORITY_HIGH) != NCSCC_RC_SUCCESS) {
> -                                             LOG_ER("%s: ncs_ipc_send
> failed", __FUNCTION__);
> -                                             delete evt;
> -                                     }
> -                             }
>                               //Post MDS version info to mailbox.
>                               AVD_EVT *evt1 = new AVD_EVT();
>                               evt1->rcv_evt =
> AVD_EVT_ND_MDS_VER_INFO;
> diff --git a/osaf/services/saf/amf/amfd/ndfsm.cc
> b/osaf/services/saf/amf/amfd/ndfsm.cc
> --- a/osaf/services/saf/amf/amfd/ndfsm.cc
> +++ b/osaf/services/saf/amf/amfd/ndfsm.cc
> @@ -32,6 +32,10 @@
> 
>  AmfDb<uint32_t, AVD_FAIL_OVER_NODE> *node_list_db = 0;      /*
> SaClmNodeIdT index */
> 
> +// indicates whether MDS service up has been received from amfnd.
> +// If a node ID is in the set, then service up has been received.
> +std::set<uint32_t> *amfnd_svc_db = 0;
> +
> 
> /*************************************************************
> ****************
>   * Function: avd_process_state_info_queue
>   *
> @@ -359,6 +363,12 @@ void avd_node_up_evh(AVD_CL_CB *cb, AVD_
>               goto done;
>       }
> 
> +     if (amfnd_svc_db->find(n2d_msg->msg_info.n2d_node_up.node_id)
> == amfnd_svc_db->end()) {
> +             // don't process node_up until svc up is received
> +             LOG_WA("amfnd svc up not yet received from node %x",
> n2d_msg->msg_info.n2d_node_up.node_id);
> +             goto done;
> +     }
> +
>       if ((n2d_msg->msg_info.n2d_node_up.node_id == cb->node_id_avd)
> && (cb->init_state < AVD_INIT_DONE)) {
>               // node up from local AVND
>               avd_process_state_info_queue(cb);
> @@ -645,8 +655,13 @@ void avd_nd_ncs_su_failed(AVD_CL_CB *cb,
> 
>  void avd_mds_avnd_up_evh(AVD_CL_CB *cb, AVD_EVT *evt)
>  {
> -     TRACE("Local node director is up, start sending heart beats to %"
> PRIx64, cb->local_avnd_adest);
> -     avd_tmr_snd_hb_evh(cb, evt);
> +     if (evt->info.node_id == cb->node_id_avd) {
> +             TRACE("Local node director is up, start sending heart beats
> to %" PRIx64, cb->local_avnd_adest);
> +             avd_tmr_snd_hb_evh(cb, evt);
> +     }
> +
> +     TRACE("amfnd on %x is up", evt->info.node_id);
> +     amfnd_svc_db->insert(evt->info.node_id);
>  }
> 
> 
> /*************************************************************
> ****************
> @@ -672,6 +687,7 @@ void avd_mds_avnd_down_evh(AVD_CL_CB *cb
> 
>       //update MDS version db.
>       nds_mds_ver_db.erase(evt->info.node_id);
> +     amfnd_svc_db->erase(evt->info.node_id);
> 
>       if (node != nullptr) {
>               // Do nothing if the local node goes down. Most likely due to
> system shutdown.

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most 
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to