But, please push it after GA release.

Thanks
-Nagu

> -----Original Message-----
> From: Nagendra Kumar
> Sent: 05 April 2017 14:38
> To: Minh Hon Chau; [email protected]; Praveen Malviya;
> [email protected]
> Cc: [email protected]
> Subject: RE: [PATCH 2 of 3] AMF: Handle node_down message [#2376] V2
> 
> Ack for all the patches, tested.
> 
> Thanks
> -Nagu
> 
> > -----Original Message-----
> > From: Minh Hon Chau [mailto:[email protected]]
> > Sent: 31 March 2017 06:04
> > To: [email protected]; Nagendra Kumar; Praveen Malviya;
> > [email protected]; [email protected]
> > Cc: [email protected]
> > Subject: [PATCH 2 of 3] AMF: Handle node_down message [#2376] V2
> >
> >  src/amf/amfd/main.cc      |   2 +-
> >  src/amf/amfd/ndfsm.cc     |  61
> > +++++++++++++++++++++++++++++++++++++++++++
> >  src/amf/amfd/proc.h       |   1 +
> >  src/amf/amfnd/avnd_defs.h |   2 +
> >  src/amf/amfnd/avnd_di.h   |   1 +
> >  src/amf/amfnd/avnd_mds.h  |   1 +
> >  src/amf/amfnd/di.cc       |  66
> > +++++++++++++++++++++++++++++++++++++++++++++-
> >  src/amf/amfnd/mds.cc      |   2 +-
> >  src/amf/amfnd/susm.cc     |  12 +++++++-
> >  src/amf/amfnd/term.cc     |   9 ++++-
> >  10 files changed, 149 insertions(+), 8 deletions(-)
> >
> >
> > This patch is how amfnd and amfd handles node_down message.
> > Before amfnd enters component termination, amfnd sends node_down
> > message to amfd, a timer is started.
> > In amfd, upon reception of node_down message, amfd will try to execute
> > all of its pending jobs. A node ack is sent if amfd finish all its
> > jobs.
> > If the timer is expired or amfnd receives node ack message for
> > NODE_DOWN_MSG, amfnd will continue its component termination phase
> >
> > diff --git a/src/amf/amfd/main.cc b/src/amf/amfd/main.cc
> > --- a/src/amf/amfd/main.cc
> > +++ b/src/amf/amfd/main.cc
> > @@ -100,7 +100,7 @@ static const AVD_EVT_HDLR g_actv_list[AV
> >     avd_pg_trk_act_evh,      /* AVD_EVT_PG_TRACK_ACT_MSG */
> >     avd_oper_req_evh,        /* AVD_EVT_OPERATION_REQUEST_MSG */
> >     avd_data_update_req_evh, /* AVD_EVT_DATA_REQUEST_MSG */
> > -   invalid_evh,         /* AVD_EVT_NODE_DOWN_MSG */
> > +   avd_node_down_evh,         /* AVD_EVT_NODE_DOWN_MSG */
> >     avd_ack_nack_evh,            /* AVD_EVT_VERIFY_ACK_NACK_MSG */
> >     avd_comp_validation_evh, /* AVD_EVT_COMP_VALIDATION_MSG
> */
> >     avd_nd_sisu_state_info_evh,       /*
> > AVD_EVT_ND_SISU_STATE_INFO_MSG */
> > diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc
> > --- a/src/amf/amfd/ndfsm.cc
> > +++ b/src/amf/amfd/ndfsm.cc
> > @@ -531,6 +531,67 @@ done:
> >  }
> >
> >
> >
> /*************************************************************
> > ****************
> > + * Function: avd_node_down_evh
> > + *
> > + * Purpose:  This function is the handler for node down event
> > + indicating
> > + * the arrival of the node_down message. AMFND sends this message
> > + when
> > + * AMFND is going to terminate OpenSAF SU(s), who are providing
> > + services
> > + * that AMFD may need. When AMFD receives this message, AMFD
> > + currently
> > + * will execute all pending IMM update jobs to avoid a loss of IMM
> > + data
> > + *
> > + * Input: cb - the AVD control block
> > + *        evt - The event information.
> > + *
> > + * Returns: None.
> > + *
> > + * NOTES:
> > + *
> > + *
> > +
> >
> **************************************************************
> > ************/
> > +void avd_node_down_evh(AVD_CL_CB *cb, AVD_EVT *evt) {
> > +   AVD_DND_MSG *n2d_msg = evt->info.avnd_msg;
> > +   AVD_AVND *node = nullptr;
> > +
> > +   TRACE_ENTER2("from nodeId=0x%x", n2d_msg-
> > >msg_info.n2d_node_down_info.node_id);
> > +
> > +   if (evt->info.avnd_msg->msg_type !=
> > AVSV_N2D_NODE_DOWN_MSG) {
> > +           LOG_WA("%s: wrong message type (%u)",
> > __FUNCTION__,evt->info.avnd_msg->msg_type);
> > +           goto done;
> > +   }
> > +
> > +   if ((node = avd_node_find_nodeid(n2d_msg-
> > >msg_info.n2d_node_down_info.node_id)) == nullptr) {
> > +           LOG_WA("%s: invalid node ID (%x)", __FUNCTION__,
> > n2d_msg->msg_info.n2d_node_down_info.node_id);
> > +           goto done;
> > +   }
> > +
> > +   if ((node->rcv_msg_id + 1) == n2d_msg-
> > >msg_info.n2d_node_down_info.msg_id)
> > +           m_AVD_SET_AVND_RCV_ID(cb, node, (n2d_msg-
> > >msg_info.n2d_node_down_info.msg_id));
> > +
> > +   // try to execute all pending jobs
> > +   AvdJobDequeueResultT ret = JOB_EXECUTED;
> > +   while (Fifo::size() > 0) {
> > +           ret = Fifo::execute(cb);
> > +           if (ret != JOB_EXECUTED) {
> > +                   LOG_WA("AMFD has (%d) pending jobs not being
> > executed", Fifo::size());
> > +                   break;
> > +           }
> > +   }
> > +   if (ret == JOB_EXECUTED) {
> > +           // send ack for node_down message to amfnd, so amfnd can
> > continue termination phase
> > +           if (avd_snd_node_ack_msg(cb, node, n2d_msg-
> > >msg_info.n2d_node_down_info.msg_id) != NCSCC_RC_SUCCESS) {
> > +                   /* log error that the director is not able to send the
> > message */
> > +                   LOG_ER("%s:%u: %u", __FILE__, __LINE__, node-
> > >node_info.nodeId);
> > +           }
> > +   }
> > +
> > +done:
> > +   avsv_dnd_msg_free(n2d_msg);
> > +   evt->info.avnd_msg = nullptr;
> > +   TRACE_LEAVE();
> > +}
> > +
> >
> +/************************************************************
> > *****************
> >   * Function: avd_nd_ncs_su_assigned
> >   *
> >   * Purpose:  This function is the handler for node director event
> > when a diff --git a/src/amf/amfd/proc.h b/src/amf/amfd/proc.h
> > --- a/src/amf/amfd/proc.h
> > +++ b/src/amf/amfd/proc.h
> > @@ -64,6 +64,7 @@ uint32_t avd_evt_queue_count(AVD_CL_CB *
> uint32_t
> > avd_count_sync_node_size(AVD_CL_CB *cb);  void
> > avd_process_state_info_queue(AVD_CL_CB *cb);  void
> > avd_node_up_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
> > +void avd_node_down_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
> >  void avd_reg_su_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);  void
> > avd_oper_req_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);  void
> > avd_mds_avnd_up_evh(AVD_CL_CB *cb, struct AVD_EVT *evt); diff --git
> > a/src/amf/amfnd/avnd_defs.h b/src/amf/amfnd/avnd_defs.h
> > --- a/src/amf/amfnd/avnd_defs.h
> > +++ b/src/amf/amfnd/avnd_defs.h
> > @@ -62,6 +62,8 @@
> >  #define AVND_COMP_CBK_RESP_TIME       5000 /* time out callback
> > response */
> >  #define AVND_AVD_MSG_RESP_TIME   1000      /* time out AvD message
> > response */
> >
> > +#define AVND_NODE_DOWN_MAX_RETRY   10 /* max retries on waiting
> > for ack of node_down msg */
> > +
> >  #define m_AVND_STACKSIZE       NCS_STACKSIZE_HUGE
> >
> >  typedef enum {
> > diff --git a/src/amf/amfnd/avnd_di.h b/src/amf/amfnd/avnd_di.h
> > --- a/src/amf/amfnd/avnd_di.h
> > +++ b/src/amf/amfnd/avnd_di.h
> > @@ -82,6 +82,7 @@ void avnd_diq_rec_del(struct avnd_cb_tag  void
> > avnd_diq_rec_send_buffered_msg(struct avnd_cb_tag *cb);  uint32_t
> > avnd_diq_rec_send(struct avnd_cb_tag *cb, AVND_DND_MSG_LIST *rec);
> > uint32_t avnd_di_reg_su_rsp_snd(struct avnd_cb_tag *cb, const
> > std::string& su_name, uint32_t ret_code);
> > +uint32_t avnd_di_node_down_msg_send(struct avnd_cb_tag *cb);
> >  uint32_t avnd_di_ack_nack_msg_send(struct avnd_cb_tag *cb, uint32_t
> > rcv_id, uint32_t view_num);  extern void avnd_di_uns32_upd_send(int
> > class_id, int attr_id, const std::string& dn, uint32_t value);  extern
> > uint32_t avnd_di_resend_pg_start_track(struct avnd_cb_tag *); diff
> > --git a/src/amf/amfnd/avnd_mds.h b/src/amf/amfnd/avnd_mds.h
> > --- a/src/amf/amfnd/avnd_mds.h
> > +++ b/src/amf/amfnd/avnd_mds.h
> > @@ -66,6 +66,7 @@ typedef struct avnd_dnd_msg_list_tag {
> >     AVND_MSG msg;
> >     AVND_TMR resp_tmr;
> >     uint32_t opq_hdl;
> > +   uint16_t no_retries;
> >     struct avnd_dnd_msg_list_tag *next;
> >  } AVND_DND_MSG_LIST;
> >
> > diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
> > --- a/src/amf/amfnd/di.cc
> > +++ b/src/amf/amfnd/di.cc
> > @@ -430,17 +430,35 @@ uint32_t avnd_evt_tmr_rcv_msg_rsp_evh(AV
> >     AVND_TMR_EVT *tmr = &evt->info.tmr;
> >     AVND_DND_MSG_LIST *rec = 0;
> >     uint32_t rc = NCSCC_RC_SUCCESS;
> > -
> > +   bool rec_tobe_deleted = false;
> >     TRACE_ENTER();
> >
> >     /* retrieve the message record */
> >     if ((0 == (rec = (AVND_DND_MSG_LIST
> > *)ncshm_take_hdl(NCS_SERVICE_ID_AVND, tmr->opq_hdl))))
> >             goto done;
> >
> > -   rc = avnd_diq_rec_send(cb, rec);
> > +   /* Resend on time out if it's NODE_UP msg only */
> > +   if (rec->msg.info.avd->msg_type == AVSV_N2D_NODE_UP_MSG) {
> > +           rc = avnd_diq_rec_send(cb, rec);
> > +   } else if (rec->msg.info.avd->msg_type ==
> > AVSV_N2D_NODE_DOWN_MSG) {
> > +           if (rec->no_retries < AVND_NODE_DOWN_MAX_RETRY) {
> > +                   rc = avnd_diq_rec_send(cb, rec);
> > +           } else {
> > +                   LOG_WA("Node Down timer retries is over");
> > +                   avnd_last_step_clean(cb);
> > +                   rec_tobe_deleted = true;
> > +           }
> > +   } else {
> > +           LOG_WA("Unexpected message response timeout with
> > msg_type(%u)", rec->msg.info.avd->msg_type);
> > +           rec_tobe_deleted = true;
> > +   }
> >
> >     ncshm_give_hdl(tmr->opq_hdl);
> >
> > +   if (rec_tobe_deleted) {
> > +           m_AVND_DIQ_REC_FIND_POP(cb, rec);
> > +           avnd_diq_rec_del(cb, rec);
> > +   }
> >  done:
> >     TRACE_LEAVE();
> >     return rc;
> > @@ -1159,6 +1177,39 @@ uint32_t avnd_di_reg_su_rsp_snd(AVND_CB  }
> >
> >
> >
> /*************************************************************
> > ***************
> > +  Name          : avnd_di_node_down_msg_send
> > +
> > +  Description   : This routine sends node_down message to active amf
> > director.
> > +
> > +  Arguments     : cb  - ptr to the AvND control block
> > +
> > +  Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE
> > +
> > +  Notes         : None.
> >
> +*************************************************************
> > *****************/
> > +uint32_t avnd_di_node_down_msg_send(AVND_CB *cb) {
> > +   AVND_MSG msg;
> > +   uint32_t rc = NCSCC_RC_SUCCESS;
> > +   TRACE_ENTER();
> > +   memset(&msg, 0, sizeof(AVND_MSG));
> > +   msg.info.avd = static_cast<AVSV_DND_MSG*>(calloc(1,
> > sizeof(AVSV_DND_MSG)));
> > +   msg.type = AVND_MSG_AVD;
> > +   msg.info.avd->msg_type = AVSV_N2D_NODE_DOWN_MSG;
> > +   msg.info.avd->msg_info.n2d_node_down_info.msg_id = ++(cb-
> > >snd_msg_id);
> > +   msg.info.avd->msg_info.n2d_node_down_info.node_id = cb-
> > >node_info.nodeId;
> > +   rc = avnd_di_msg_send(cb, &msg);
> > +   if (rc == NCSCC_RC_SUCCESS) {
> > +           msg.info.avd = 0;
> > +   }
> > +
> > +   // free the contents of avnd message
> > +   avnd_msg_content_free(cb, &msg);
> > +   TRACE_LEAVE();
> > +   return NCSCC_RC_SUCCESS;
> > +}
> > +
> >
> +/************************************************************
> > ****************
> >    Name          : avnd_di_msg_ack_process
> >
> >    Description   : This routine processes the the acks that are generated by
> > @@ -1179,6 +1230,12 @@ void avnd_di_msg_ack_process(AVND_CB *cb
> >     /* find & pop the matching record */
> >     m_AVND_DIQ_REC_FIND(cb, mid, rec);
> >     if (rec) {
> > +           if (rec->msg.info.avd->msg_type ==
> > AVSV_N2D_NODE_DOWN_MSG) {
> > +                   // first to stop timer to avoid processing timeout
> > event
> > +                   // then perform last step clean up
> > +                   avnd_stop_tmr(cb, &rec->resp_tmr);
> > +                   avnd_last_step_clean(cb);
> > +           }
> >             m_AVND_DIQ_REC_FIND_POP(cb, rec);
> >             avnd_diq_rec_del(cb, rec);
> >     }
> > @@ -1240,6 +1297,7 @@ AVND_DND_MSG_LIST
> *avnd_diq_rec_add(AVND
> >     /* store the msg (transfer memory ownership) */
> >     rec->msg.type = msg->type;
> >     rec->msg.info.avd = msg->info.avd;
> > +   rec->no_retries = 0;
> >     msg->info.avd = 0;
> >
> >     /* push the record to the AvD msg list */ @@ -1402,8 +1460,12 @@
> > uint32_t avnd_diq_rec_send(AVND_CB *cb,
> >
> >     /* start the msg response timer */
> >     if (NCSCC_RC_SUCCESS == rc) {
> > +           rec->no_retries++;
> >             if (rec->msg.info.avd->msg_type ==
> > AVSV_N2D_NODE_UP_MSG)
> >                     m_AVND_TMR_MSG_RESP_START(cb, *rec, rc);
> > +           if (rec->msg.info.avd->msg_type ==
> > AVSV_N2D_NODE_DOWN_MSG) {
> > +                   m_AVND_TMR_MSG_RESP_START(cb, *rec, rc);
> > +           }
> >             msg.info.avd = 0;
> >     }
> >
> > diff --git a/src/amf/amfnd/mds.cc b/src/amf/amfnd/mds.cc
> > --- a/src/amf/amfnd/mds.cc
> > +++ b/src/amf/amfnd/mds.cc
> > @@ -1417,7 +1417,7 @@ uint32_t avnd_mds_send(AVND_CB *cb, AVND
> >     case AVND_MSG_AVD:
> >             send_info->i_to_svc = NCSMDS_SVC_ID_AVD;
> >             /* Don't send any messages if we are shutting down */
> > -           if (m_AVND_IS_SHUTTING_DOWN(cb)) {
> > +           if (m_AVND_IS_SHUTTING_DOWN(cb) && msg->info.avd-
> > >msg_type != AVSV_N2D_NODE_DOWN_MSG) {
> >                     TRACE_1("Shutting down, not sending msg to
> AMFD.");
> >                     goto done;
> >             }
> > diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc
> > --- a/src/amf/amfnd/susm.cc
> > +++ b/src/amf/amfnd/susm.cc
> > @@ -1102,7 +1102,11 @@ uint32_t avnd_su_si_oper_done(AVND_CB *c
> >                             }
> >                     } else {
> >                             LOG_NO("Removed assignments from AMF
> components");
> > -                           avnd_last_step_clean(cb);
> > +                           if (m_NCS_NODE_ID_FROM_MDS_DEST(cb-
> > >active_avd_adest) != ncs_get_node_id()) {
> > +                                   avnd_last_step_clean(cb);
> > +                           } else {
> > +                                   avnd_di_node_down_msg_send(cb);
> > +                           }
> >                     }
> >             }
> >     }
> > @@ -1168,7 +1172,11 @@ uint32_t avnd_su_si_oper_done(AVND_CB *c
> >                     }
> >             } else {
> >                     LOG_NO("Removed assignments from AMF
> components");
> > -                   avnd_last_step_clean(cb);
> > +                   if (m_NCS_NODE_ID_FROM_MDS_DEST(cb-
> > >active_avd_adest) != ncs_get_node_id()) {
> > +                           avnd_last_step_clean(cb);
> > +                   } else {
> > +                           avnd_di_node_down_msg_send(cb);
> > +                   }
> >             }
> >     }
> >
> > diff --git a/src/amf/amfnd/term.cc b/src/amf/amfnd/term.cc
> > --- a/src/amf/amfnd/term.cc
> > +++ b/src/amf/amfnd/term.cc
> > @@ -175,8 +175,13 @@ uint32_t avnd_evt_last_step_term_evh(AVN
> >     }
> >
> >  cleanup_components:
> > -   if (!si_removed)
> > -           avnd_last_step_clean(cb);
> > +   if (!si_removed) {
> > +           if (m_NCS_NODE_ID_FROM_MDS_DEST(cb-
> > >active_avd_adest) != ncs_get_node_id()) {
> > +                   avnd_last_step_clean(cb);
> > +           } else {
> > +                   avnd_di_node_down_msg_send(cb);
> > +           }
> > +   }
> >  done:
> >     TRACE_LEAVE();
> >     return NCSCC_RC_SUCCESS;

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to