Sure, I wait after GA release. Thanks, Minh > But, please push it after GA release. > > Thanks > -Nagu > >> -----Original Message----- >> From: Nagendra Kumar >> Sent: 05 April 2017 14:38 >> To: Minh Hon Chau; [email protected]; Praveen Malviya; >> [email protected] >> Cc: [email protected] >> Subject: RE: [PATCH 2 of 3] AMF: Handle node_down message [#2376] V2 >> >> Ack for all the patches, tested. >> >> Thanks >> -Nagu >> >> > -----Original Message----- >> > From: Minh Hon Chau [mailto:[email protected]] >> > Sent: 31 March 2017 06:04 >> > To: [email protected]; Nagendra Kumar; Praveen Malviya; >> > [email protected]; [email protected] >> > Cc: [email protected] >> > Subject: [PATCH 2 of 3] AMF: Handle node_down message [#2376] V2 >> > >> > src/amf/amfd/main.cc | 2 +- >> > src/amf/amfd/ndfsm.cc | 61 >> > +++++++++++++++++++++++++++++++++++++++++++ >> > src/amf/amfd/proc.h | 1 + >> > src/amf/amfnd/avnd_defs.h | 2 + >> > src/amf/amfnd/avnd_di.h | 1 + >> > src/amf/amfnd/avnd_mds.h | 1 + >> > src/amf/amfnd/di.cc | 66 >> > +++++++++++++++++++++++++++++++++++++++++++++- >> > src/amf/amfnd/mds.cc | 2 +- >> > src/amf/amfnd/susm.cc | 12 +++++++- >> > src/amf/amfnd/term.cc | 9 ++++- >> > 10 files changed, 149 insertions(+), 8 deletions(-) >> > >> > >> > This patch is how amfnd and amfd handles node_down message. >> > Before amfnd enters component termination, amfnd sends node_down >> > message to amfd, a timer is started. >> > In amfd, upon reception of node_down message, amfd will try to execute >> > all of its pending jobs. A node ack is sent if amfd finish all its >> > jobs. >> > If the timer is expired or amfnd receives node ack message for >> > NODE_DOWN_MSG, amfnd will continue its component termination phase >> > >> > diff --git a/src/amf/amfd/main.cc b/src/amf/amfd/main.cc >> > --- a/src/amf/amfd/main.cc >> > +++ b/src/amf/amfd/main.cc >> > @@ -100,7 +100,7 @@ static const AVD_EVT_HDLR g_actv_list[AV >> > avd_pg_trk_act_evh, /* AVD_EVT_PG_TRACK_ACT_MSG */ >> > avd_oper_req_evh, /* AVD_EVT_OPERATION_REQUEST_MSG */ >> > avd_data_update_req_evh, /* AVD_EVT_DATA_REQUEST_MSG */ >> > - invalid_evh, /* AVD_EVT_NODE_DOWN_MSG */ >> > + avd_node_down_evh, /* AVD_EVT_NODE_DOWN_MSG */ >> > avd_ack_nack_evh, /* AVD_EVT_VERIFY_ACK_NACK_MSG */ >> > avd_comp_validation_evh, /* AVD_EVT_COMP_VALIDATION_MSG >> */ >> > avd_nd_sisu_state_info_evh, /* >> > AVD_EVT_ND_SISU_STATE_INFO_MSG */ >> > diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc >> > --- a/src/amf/amfd/ndfsm.cc >> > +++ b/src/amf/amfd/ndfsm.cc >> > @@ -531,6 +531,67 @@ done: >> > } >> > >> > >> > >> /************************************************************* >> > **************** >> > + * Function: avd_node_down_evh >> > + * >> > + * Purpose: This function is the handler for node down event >> > + indicating >> > + * the arrival of the node_down message. AMFND sends this message >> > + when >> > + * AMFND is going to terminate OpenSAF SU(s), who are providing >> > + services >> > + * that AMFD may need. When AMFD receives this message, AMFD >> > + currently >> > + * will execute all pending IMM update jobs to avoid a loss of IMM >> > + data >> > + * >> > + * Input: cb - the AVD control block >> > + * evt - The event information. >> > + * >> > + * Returns: None. >> > + * >> > + * NOTES: >> > + * >> > + * >> > + >> > >> ************************************************************** >> > ************/ >> > +void avd_node_down_evh(AVD_CL_CB *cb, AVD_EVT *evt) { >> > + AVD_DND_MSG *n2d_msg = evt->info.avnd_msg; >> > + AVD_AVND *node = nullptr; >> > + >> > + TRACE_ENTER2("from nodeId=0x%x", n2d_msg- >> > >msg_info.n2d_node_down_info.node_id); >> > + >> > + if (evt->info.avnd_msg->msg_type != >> > AVSV_N2D_NODE_DOWN_MSG) { >> > + LOG_WA("%s: wrong message type (%u)", >> > __FUNCTION__,evt->info.avnd_msg->msg_type); >> > + goto done; >> > + } >> > + >> > + if ((node = avd_node_find_nodeid(n2d_msg- >> > >msg_info.n2d_node_down_info.node_id)) == nullptr) { >> > + LOG_WA("%s: invalid node ID (%x)", __FUNCTION__, >> > n2d_msg->msg_info.n2d_node_down_info.node_id); >> > + goto done; >> > + } >> > + >> > + if ((node->rcv_msg_id + 1) == n2d_msg- >> > >msg_info.n2d_node_down_info.msg_id) >> > + m_AVD_SET_AVND_RCV_ID(cb, node, (n2d_msg- >> > >msg_info.n2d_node_down_info.msg_id)); >> > + >> > + // try to execute all pending jobs >> > + AvdJobDequeueResultT ret = JOB_EXECUTED; >> > + while (Fifo::size() > 0) { >> > + ret = Fifo::execute(cb); >> > + if (ret != JOB_EXECUTED) { >> > + LOG_WA("AMFD has (%d) pending jobs not being >> > executed", Fifo::size()); >> > + break; >> > + } >> > + } >> > + if (ret == JOB_EXECUTED) { >> > + // send ack for node_down message to amfnd, so amfnd can >> > continue termination phase >> > + if (avd_snd_node_ack_msg(cb, node, n2d_msg- >> > >msg_info.n2d_node_down_info.msg_id) != NCSCC_RC_SUCCESS) { >> > + /* log error that the director is not able to send the >> > message */ >> > + LOG_ER("%s:%u: %u", __FILE__, __LINE__, node- >> > >node_info.nodeId); >> > + } >> > + } >> > + >> > +done: >> > + avsv_dnd_msg_free(n2d_msg); >> > + evt->info.avnd_msg = nullptr; >> > + TRACE_LEAVE(); >> > +} >> > + >> > >> +/************************************************************ >> > ***************** >> > * Function: avd_nd_ncs_su_assigned >> > * >> > * Purpose: This function is the handler for node director event >> > when a diff --git a/src/amf/amfd/proc.h b/src/amf/amfd/proc.h >> > --- a/src/amf/amfd/proc.h >> > +++ b/src/amf/amfd/proc.h >> > @@ -64,6 +64,7 @@ uint32_t avd_evt_queue_count(AVD_CL_CB * >> uint32_t >> > avd_count_sync_node_size(AVD_CL_CB *cb); void >> > avd_process_state_info_queue(AVD_CL_CB *cb); void >> > avd_node_up_evh(AVD_CL_CB *cb, struct AVD_EVT *evt); >> > +void avd_node_down_evh(AVD_CL_CB *cb, struct AVD_EVT *evt); >> > void avd_reg_su_evh(AVD_CL_CB *cb, struct AVD_EVT *evt); void >> > avd_oper_req_evh(AVD_CL_CB *cb, struct AVD_EVT *evt); void >> > avd_mds_avnd_up_evh(AVD_CL_CB *cb, struct AVD_EVT *evt); diff --git >> > a/src/amf/amfnd/avnd_defs.h b/src/amf/amfnd/avnd_defs.h >> > --- a/src/amf/amfnd/avnd_defs.h >> > +++ b/src/amf/amfnd/avnd_defs.h >> > @@ -62,6 +62,8 @@ >> > #define AVND_COMP_CBK_RESP_TIME 5000 /* time out callback >> > response */ >> > #define AVND_AVD_MSG_RESP_TIME 1000 /* time out AvD message >> > response */ >> > >> > +#define AVND_NODE_DOWN_MAX_RETRY 10 /* max retries on waiting >> > for ack of node_down msg */ >> > + >> > #define m_AVND_STACKSIZE NCS_STACKSIZE_HUGE >> > >> > typedef enum { >> > diff --git a/src/amf/amfnd/avnd_di.h b/src/amf/amfnd/avnd_di.h >> > --- a/src/amf/amfnd/avnd_di.h >> > +++ b/src/amf/amfnd/avnd_di.h >> > @@ -82,6 +82,7 @@ void avnd_diq_rec_del(struct avnd_cb_tag void >> > avnd_diq_rec_send_buffered_msg(struct avnd_cb_tag *cb); uint32_t >> > avnd_diq_rec_send(struct avnd_cb_tag *cb, AVND_DND_MSG_LIST *rec); >> > uint32_t avnd_di_reg_su_rsp_snd(struct avnd_cb_tag *cb, const >> > std::string& su_name, uint32_t ret_code); >> > +uint32_t avnd_di_node_down_msg_send(struct avnd_cb_tag *cb); >> > uint32_t avnd_di_ack_nack_msg_send(struct avnd_cb_tag *cb, uint32_t >> > rcv_id, uint32_t view_num); extern void avnd_di_uns32_upd_send(int >> > class_id, int attr_id, const std::string& dn, uint32_t value); extern >> > uint32_t avnd_di_resend_pg_start_track(struct avnd_cb_tag *); diff >> > --git a/src/amf/amfnd/avnd_mds.h b/src/amf/amfnd/avnd_mds.h >> > --- a/src/amf/amfnd/avnd_mds.h >> > +++ b/src/amf/amfnd/avnd_mds.h >> > @@ -66,6 +66,7 @@ typedef struct avnd_dnd_msg_list_tag { >> > AVND_MSG msg; >> > AVND_TMR resp_tmr; >> > uint32_t opq_hdl; >> > + uint16_t no_retries; >> > struct avnd_dnd_msg_list_tag *next; >> > } AVND_DND_MSG_LIST; >> > >> > diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc >> > --- a/src/amf/amfnd/di.cc >> > +++ b/src/amf/amfnd/di.cc >> > @@ -430,17 +430,35 @@ uint32_t avnd_evt_tmr_rcv_msg_rsp_evh(AV >> > AVND_TMR_EVT *tmr = &evt->info.tmr; >> > AVND_DND_MSG_LIST *rec = 0; >> > uint32_t rc = NCSCC_RC_SUCCESS; >> > - >> > + bool rec_tobe_deleted = false; >> > TRACE_ENTER(); >> > >> > /* retrieve the message record */ >> > if ((0 == (rec = (AVND_DND_MSG_LIST >> > *)ncshm_take_hdl(NCS_SERVICE_ID_AVND, tmr->opq_hdl)))) >> > goto done; >> > >> > - rc = avnd_diq_rec_send(cb, rec); >> > + /* Resend on time out if it's NODE_UP msg only */ >> > + if (rec->msg.info.avd->msg_type == AVSV_N2D_NODE_UP_MSG) { >> > + rc = avnd_diq_rec_send(cb, rec); >> > + } else if (rec->msg.info.avd->msg_type == >> > AVSV_N2D_NODE_DOWN_MSG) { >> > + if (rec->no_retries < AVND_NODE_DOWN_MAX_RETRY) { >> > + rc = avnd_diq_rec_send(cb, rec); >> > + } else { >> > + LOG_WA("Node Down timer retries is over"); >> > + avnd_last_step_clean(cb); >> > + rec_tobe_deleted = true; >> > + } >> > + } else { >> > + LOG_WA("Unexpected message response timeout with >> > msg_type(%u)", rec->msg.info.avd->msg_type); >> > + rec_tobe_deleted = true; >> > + } >> > >> > ncshm_give_hdl(tmr->opq_hdl); >> > >> > + if (rec_tobe_deleted) { >> > + m_AVND_DIQ_REC_FIND_POP(cb, rec); >> > + avnd_diq_rec_del(cb, rec); >> > + } >> > done: >> > TRACE_LEAVE(); >> > return rc; >> > @@ -1159,6 +1177,39 @@ uint32_t avnd_di_reg_su_rsp_snd(AVND_CB } >> > >> > >> > >> /************************************************************* >> > *************** >> > + Name : avnd_di_node_down_msg_send >> > + >> > + Description : This routine sends node_down message to active amf >> > director. >> > + >> > + Arguments : cb - ptr to the AvND control block >> > + >> > + Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE >> > + >> > + Notes : None. >> > >> +************************************************************* >> > *****************/ >> > +uint32_t avnd_di_node_down_msg_send(AVND_CB *cb) { >> > + AVND_MSG msg; >> > + uint32_t rc = NCSCC_RC_SUCCESS; >> > + TRACE_ENTER(); >> > + memset(&msg, 0, sizeof(AVND_MSG)); >> > + msg.info.avd = static_cast<AVSV_DND_MSG*>(calloc(1, >> > sizeof(AVSV_DND_MSG))); >> > + msg.type = AVND_MSG_AVD; >> > + msg.info.avd->msg_type = AVSV_N2D_NODE_DOWN_MSG; >> > + msg.info.avd->msg_info.n2d_node_down_info.msg_id = ++(cb- >> > >snd_msg_id); >> > + msg.info.avd->msg_info.n2d_node_down_info.node_id = cb- >> > >node_info.nodeId; >> > + rc = avnd_di_msg_send(cb, &msg); >> > + if (rc == NCSCC_RC_SUCCESS) { >> > + msg.info.avd = 0; >> > + } >> > + >> > + // free the contents of avnd message >> > + avnd_msg_content_free(cb, &msg); >> > + TRACE_LEAVE(); >> > + return NCSCC_RC_SUCCESS; >> > +} >> > + >> > >> +/************************************************************ >> > **************** >> > Name : avnd_di_msg_ack_process >> > >> > Description : This routine processes the the acks that are >> generated by >> > @@ -1179,6 +1230,12 @@ void avnd_di_msg_ack_process(AVND_CB *cb >> > /* find & pop the matching record */ >> > m_AVND_DIQ_REC_FIND(cb, mid, rec); >> > if (rec) { >> > + if (rec->msg.info.avd->msg_type == >> > AVSV_N2D_NODE_DOWN_MSG) { >> > + // first to stop timer to avoid processing timeout >> > event >> > + // then perform last step clean up >> > + avnd_stop_tmr(cb, &rec->resp_tmr); >> > + avnd_last_step_clean(cb); >> > + } >> > m_AVND_DIQ_REC_FIND_POP(cb, rec); >> > avnd_diq_rec_del(cb, rec); >> > } >> > @@ -1240,6 +1297,7 @@ AVND_DND_MSG_LIST >> *avnd_diq_rec_add(AVND >> > /* store the msg (transfer memory ownership) */ >> > rec->msg.type = msg->type; >> > rec->msg.info.avd = msg->info.avd; >> > + rec->no_retries = 0; >> > msg->info.avd = 0; >> > >> > /* push the record to the AvD msg list */ @@ -1402,8 +1460,12 @@ >> > uint32_t avnd_diq_rec_send(AVND_CB *cb, >> > >> > /* start the msg response timer */ >> > if (NCSCC_RC_SUCCESS == rc) { >> > + rec->no_retries++; >> > if (rec->msg.info.avd->msg_type == >> > AVSV_N2D_NODE_UP_MSG) >> > m_AVND_TMR_MSG_RESP_START(cb, *rec, rc); >> > + if (rec->msg.info.avd->msg_type == >> > AVSV_N2D_NODE_DOWN_MSG) { >> > + m_AVND_TMR_MSG_RESP_START(cb, *rec, rc); >> > + } >> > msg.info.avd = 0; >> > } >> > >> > diff --git a/src/amf/amfnd/mds.cc b/src/amf/amfnd/mds.cc >> > --- a/src/amf/amfnd/mds.cc >> > +++ b/src/amf/amfnd/mds.cc >> > @@ -1417,7 +1417,7 @@ uint32_t avnd_mds_send(AVND_CB *cb, AVND >> > case AVND_MSG_AVD: >> > send_info->i_to_svc = NCSMDS_SVC_ID_AVD; >> > /* Don't send any messages if we are shutting down */ >> > - if (m_AVND_IS_SHUTTING_DOWN(cb)) { >> > + if (m_AVND_IS_SHUTTING_DOWN(cb) && msg->info.avd- >> > >msg_type != AVSV_N2D_NODE_DOWN_MSG) { >> > TRACE_1("Shutting down, not sending msg to >> AMFD."); >> > goto done; >> > } >> > diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc >> > --- a/src/amf/amfnd/susm.cc >> > +++ b/src/amf/amfnd/susm.cc >> > @@ -1102,7 +1102,11 @@ uint32_t avnd_su_si_oper_done(AVND_CB *c >> > } >> > } else { >> > LOG_NO("Removed assignments from AMF >> components"); >> > - avnd_last_step_clean(cb); >> > + if (m_NCS_NODE_ID_FROM_MDS_DEST(cb- >> > >active_avd_adest) != ncs_get_node_id()) { >> > + avnd_last_step_clean(cb); >> > + } else { >> > + avnd_di_node_down_msg_send(cb); >> > + } >> > } >> > } >> > } >> > @@ -1168,7 +1172,11 @@ uint32_t avnd_su_si_oper_done(AVND_CB *c >> > } >> > } else { >> > LOG_NO("Removed assignments from AMF >> components"); >> > - avnd_last_step_clean(cb); >> > + if (m_NCS_NODE_ID_FROM_MDS_DEST(cb- >> > >active_avd_adest) != ncs_get_node_id()) { >> > + avnd_last_step_clean(cb); >> > + } else { >> > + avnd_di_node_down_msg_send(cb); >> > + } >> > } >> > } >> > >> > diff --git a/src/amf/amfnd/term.cc b/src/amf/amfnd/term.cc >> > --- a/src/amf/amfnd/term.cc >> > +++ b/src/amf/amfnd/term.cc >> > @@ -175,8 +175,13 @@ uint32_t avnd_evt_last_step_term_evh(AVN >> > } >> > >> > cleanup_components: >> > - if (!si_removed) >> > - avnd_last_step_clean(cb); >> > + if (!si_removed) { >> > + if (m_NCS_NODE_ID_FROM_MDS_DEST(cb- >> > >active_avd_adest) != ncs_get_node_id()) { >> > + avnd_last_step_clean(cb); >> > + } else { >> > + avnd_di_node_down_msg_send(cb); >> > + } >> > + } >> > done: >> > TRACE_LEAVE(); >> > return NCSCC_RC_SUCCESS; >
------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/opensaf-devel
