Hi Minh,

1. While stopping payload, it is sending node_down to Act controller. This is 
not required as 'payload stop' is not going to affect controller.
2. While testing backward compatibility, I kept SC-1 and SC-2 as old (without 
your patch) and payload with your patch.
When issued 'opensafd stop' on PL-3 and amfnd on PL-3 couldn't exit. Below is 
the logs.
        - This patch should be restricted for controller 'opensafd stop' and 
that too Act only.
        - Also, please check backward compatibility i.e. Old Act and New 
payload.

Mar 30 13:57:14 PM_PL-3 opensafd: Stopping OpenSAF Services
Mar 30 13:57:14 PM_PL-3 osafamfnd[11305]: NO Shutdown initiated
Mar 30 13:57:25 PM_PL-3 osafamfnd[11305]: WA Node Down timer retries is over
Mar 30 13:57:25 PM_PL-3 osafamfnd[11305]: NO Terminating all AMF components
Mar 30 13:57:25 PM_PL-3 osafmsgnd[11354]: exiting for shutdown
Mar 30 13:57:25 PM_PL-3 osafimmnd[11295]: NO Implementer locally disconnected. 
Marking it as doomed 15 <53, 2030f> (MsgQueueService131855)
Mar 30 13:57:25 PM_PL-3 osafimmnd[11295]: NO Implementer disconnected 15 <53, 
2030f> (MsgQueueService131855)
Mar 30 13:57:25 PM_PL-3 osafsmfnd[11365]: exiting for shutdown
Mar 30 13:57:25 PM_PL-3 osafamfwd[11316]: exiting for shutdown
Mar 30 13:57:25 PM_PL-3 osafckptnd[11326]: exiting for shutdown
Mar 30 13:57:25 PM_PL-3 osaflcknd[11336]: exiting for shutdown
Mar 30 13:57:25 PM_PL-3 osafclmna[11285]: exiting for shutdown
Mar 30 13:57:25 PM_PL-3 osafimmnd[11295]: exiting for shutdown
Mar 30 13:58:14 PM_PL-3 opensafd: amfnd has not yet exited, killing it forcibly.
 
Thanks
-Nagu

> -----Original Message-----
> From: Minh Hon Chau [mailto:[email protected]]
> Sent: 23 March 2017 10:29
> To: [email protected]; Nagendra Kumar; Praveen Malviya;
> [email protected]; [email protected]
> Cc: [email protected]
> Subject: [PATCH 2 of 3] AMF: Handle node_down message [#2376]
> 
>  src/amf/amfd/main.cc      |   2 +-
>  src/amf/amfd/ndfsm.cc     |  61
> +++++++++++++++++++++++++++++++++++++++++++++++
>  src/amf/amfd/proc.h       |   1 +
>  src/amf/amfnd/avnd_defs.h |   2 +
>  src/amf/amfnd/avnd_di.h   |   1 +
>  src/amf/amfnd/avnd_mds.h  |   1 +
>  src/amf/amfnd/di.cc       |  61
> ++++++++++++++++++++++++++++++++++++++++++++++-
>  src/amf/amfnd/mds.cc      |   2 +-
>  src/amf/amfnd/susm.cc     |  12 +++++++-
>  src/amf/amfnd/term.cc     |   9 +++++-
>  10 files changed, 145 insertions(+), 7 deletions(-)
> 
> 
> This patch is how amfnd and amfd handles node_down message.
> Before amfnd enters component termination, amfnd sends
> node_down message to amfd, a timer is started.
> In amfd, upon reception of node_down message, amfd will try
> to execute all of its pending jobs. A node ack is sent if amfd
> finish all its jobs.
> If the timer is expired or amfnd receives node ack message for
> NODE_DOWN_MSG, amfnd will continue its component termination
> phase
> 
> diff --git a/src/amf/amfd/main.cc b/src/amf/amfd/main.cc
> --- a/src/amf/amfd/main.cc
> +++ b/src/amf/amfd/main.cc
> @@ -100,7 +100,7 @@ static const AVD_EVT_HDLR g_actv_list[AV
>       avd_pg_trk_act_evh,      /* AVD_EVT_PG_TRACK_ACT_MSG */
>       avd_oper_req_evh,        /* AVD_EVT_OPERATION_REQUEST_MSG */
>       avd_data_update_req_evh, /* AVD_EVT_DATA_REQUEST_MSG */
> -     invalid_evh,         /* AVD_EVT_NODE_DOWN_MSG */
> +     avd_node_down_evh,         /* AVD_EVT_NODE_DOWN_MSG */
>       avd_ack_nack_evh,            /* AVD_EVT_VERIFY_ACK_NACK_MSG */
>       avd_comp_validation_evh, /* AVD_EVT_COMP_VALIDATION_MSG
> */
>       avd_nd_sisu_state_info_evh,       /*
> AVD_EVT_ND_SISU_STATE_INFO_MSG */
> diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc
> --- a/src/amf/amfd/ndfsm.cc
> +++ b/src/amf/amfd/ndfsm.cc
> @@ -531,6 +531,67 @@ done:
>  }
> 
> 
> /*************************************************************
> ****************
> + * Function: avd_node_down_evh
> + *
> + * Purpose:  This function is the handler for node down event indicating
> + * the arrival of the node_down message. AMFND sends this message when
> + * AMFND is going to terminate OpenSAF SU(s), who are providing services
> + * that AMFD may need. When AMFD receives this message, AMFD currently
> + * will execute all pending IMM update jobs to avoid a loss of IMM data
> + *
> + * Input: cb - the AVD control block
> + *        evt - The event information.
> + *
> + * Returns: None.
> + *
> + * NOTES:
> + *
> + *
> +
> **************************************************************
> ************/
> +void avd_node_down_evh(AVD_CL_CB *cb, AVD_EVT *evt)
> +{
> +     AVD_DND_MSG *n2d_msg = evt->info.avnd_msg;
> +     AVD_AVND *node = nullptr;
> +
> +     TRACE_ENTER2("from nodeId=0x%x", n2d_msg-
> >msg_info.n2d_node_down_info.node_id);
> +
> +     if (evt->info.avnd_msg->msg_type !=
> AVSV_N2D_NODE_DOWN_MSG) {
> +             LOG_WA("%s: wrong message type (%u)",
> __FUNCTION__,evt->info.avnd_msg->msg_type);
> +             goto done;
> +     }
> +
> +     if ((node = avd_node_find_nodeid(n2d_msg-
> >msg_info.n2d_node_down_info.node_id)) == nullptr) {
> +             LOG_WA("%s: invalid node ID (%x)", __FUNCTION__,
> n2d_msg->msg_info.n2d_node_down_info.node_id);
> +             goto done;
> +     }
> +
> +     if ((node->rcv_msg_id + 1) == n2d_msg-
> >msg_info.n2d_node_down_info.msg_id)
> +             m_AVD_SET_AVND_RCV_ID(cb, node, (n2d_msg-
> >msg_info.n2d_node_down_info.msg_id));
> +
> +     // try to execute all pending jobs
> +     AvdJobDequeueResultT ret = JOB_EXECUTED;
> +     while (Fifo::size() > 0) {
> +             ret = Fifo::execute(cb);
> +             if (ret != JOB_EXECUTED) {
> +                     LOG_WA("AMFD has (%d) pending jobs not being
> executed", Fifo::size());
> +                     break;
> +             }
> +     }
> +     if (ret == JOB_EXECUTED) {
> +             // send ack for node_down message to amfnd, so amfnd can
> continue termination phase
> +             if (avd_snd_node_ack_msg(cb, node, n2d_msg-
> >msg_info.n2d_node_down_info.msg_id) != NCSCC_RC_SUCCESS) {
> +                     /* log error that the director is not able to send the
> message */
> +                     LOG_ER("%s:%u: %u", __FILE__, __LINE__, node-
> >node_info.nodeId);
> +             }
> +     }
> +
> +done:
> +     avsv_dnd_msg_free(n2d_msg);
> +     evt->info.avnd_msg = nullptr;
> +     TRACE_LEAVE();
> +}
> +
> +/************************************************************
> *****************
>   * Function: avd_nd_ncs_su_assigned
>   *
>   * Purpose:  This function is the handler for node director event when a
> diff --git a/src/amf/amfd/proc.h b/src/amf/amfd/proc.h
> --- a/src/amf/amfd/proc.h
> +++ b/src/amf/amfd/proc.h
> @@ -64,6 +64,7 @@ uint32_t avd_evt_queue_count(AVD_CL_CB *
>  uint32_t avd_count_sync_node_size(AVD_CL_CB *cb);
>  void avd_process_state_info_queue(AVD_CL_CB *cb);
>  void avd_node_up_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
> +void avd_node_down_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
>  void avd_reg_su_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
>  void avd_oper_req_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
>  void avd_mds_avnd_up_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
> diff --git a/src/amf/amfnd/avnd_defs.h b/src/amf/amfnd/avnd_defs.h
> --- a/src/amf/amfnd/avnd_defs.h
> +++ b/src/amf/amfnd/avnd_defs.h
> @@ -62,6 +62,8 @@
>  #define AVND_COMP_CBK_RESP_TIME       5000   /* time out callback
> response */
>  #define AVND_AVD_MSG_RESP_TIME   1000        /* time out AvD message
> response */
> 
> +#define AVND_NODE_DOWN_MAX_RETRY     10 /* max retries on waiting
> for ack of node_down msg */
> +
>  #define m_AVND_STACKSIZE       NCS_STACKSIZE_HUGE
> 
>  typedef enum {
> diff --git a/src/amf/amfnd/avnd_di.h b/src/amf/amfnd/avnd_di.h
> --- a/src/amf/amfnd/avnd_di.h
> +++ b/src/amf/amfnd/avnd_di.h
> @@ -82,6 +82,7 @@ void avnd_diq_rec_del(struct avnd_cb_tag
>  void avnd_diq_rec_send_buffered_msg(struct avnd_cb_tag *cb);
>  uint32_t avnd_diq_rec_send(struct avnd_cb_tag *cb, AVND_DND_MSG_LIST
> *rec);
>  uint32_t avnd_di_reg_su_rsp_snd(struct avnd_cb_tag *cb, const std::string&
> su_name, uint32_t ret_code);
> +uint32_t avnd_di_node_down_msg_send(struct avnd_cb_tag *cb);
>  uint32_t avnd_di_ack_nack_msg_send(struct avnd_cb_tag *cb, uint32_t
> rcv_id, uint32_t view_num);
>  extern void avnd_di_uns32_upd_send(int class_id, int attr_id, const
> std::string& dn, uint32_t value);
>  extern uint32_t avnd_di_resend_pg_start_track(struct avnd_cb_tag *);
> diff --git a/src/amf/amfnd/avnd_mds.h b/src/amf/amfnd/avnd_mds.h
> --- a/src/amf/amfnd/avnd_mds.h
> +++ b/src/amf/amfnd/avnd_mds.h
> @@ -66,6 +66,7 @@ typedef struct avnd_dnd_msg_list_tag {
>       AVND_MSG msg;
>       AVND_TMR resp_tmr;
>       uint32_t opq_hdl;
> +     uint16_t no_retries;
>       struct avnd_dnd_msg_list_tag *next;
>  } AVND_DND_MSG_LIST;
> 
> diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
> --- a/src/amf/amfnd/di.cc
> +++ b/src/amf/amfnd/di.cc
> @@ -437,7 +437,22 @@ uint32_t avnd_evt_tmr_rcv_msg_rsp_evh(AV
>       if ((0 == (rec = (AVND_DND_MSG_LIST
> *)ncshm_take_hdl(NCS_SERVICE_ID_AVND, tmr->opq_hdl))))
>               goto done;
> 
> -     rc = avnd_diq_rec_send(cb, rec);
> +     /* Resend on time out if it's NODE_UP msg only */
> +     if (rec->msg.info.avd->msg_type == AVSV_N2D_NODE_UP_MSG) {
> +             rc = avnd_diq_rec_send(cb, rec);
> +     } else if (rec->msg.info.avd->msg_type ==
> AVSV_N2D_NODE_DOWN_MSG) {
> +                     if (rec->no_retries <
> AVND_NODE_DOWN_MAX_RETRY) {
> +                             rc = avnd_diq_rec_send(cb, rec);
> +                     } else {
> +                             LOG_WA("Node Down timer retries is over");
> +                             avnd_last_step_clean(cb);
> +                             m_AVND_DIQ_REC_FIND_POP(cb, rec);
> +                             avnd_diq_rec_del(cb, rec);
> +                     }
> +     } else {
> +             m_AVND_DIQ_REC_FIND_POP(cb, rec);
> +             avnd_diq_rec_del(cb, rec);
> +     }
> 
>       ncshm_give_hdl(tmr->opq_hdl);
> 
> @@ -1159,6 +1174,39 @@ uint32_t avnd_di_reg_su_rsp_snd(AVND_CB
>  }
> 
> 
> /*************************************************************
> ***************
> +  Name          : avnd_di_node_down_msg_send
> +
> +  Description   : This routine sends node_down message to active amf
> director.
> +
> +  Arguments     : cb  - ptr to the AvND control block
> +
> +  Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE
> +
> +  Notes         : None.
> +*************************************************************
> *****************/
> +uint32_t avnd_di_node_down_msg_send(AVND_CB *cb)
> +{
> +     AVND_MSG msg;
> +     uint32_t rc = NCSCC_RC_SUCCESS;
> +     TRACE_ENTER();
> +     memset(&msg, 0, sizeof(AVND_MSG));
> +     msg.info.avd = static_cast<AVSV_DND_MSG*>(calloc(1,
> sizeof(AVSV_DND_MSG)));
> +     msg.type = AVND_MSG_AVD;
> +     msg.info.avd->msg_type = AVSV_N2D_NODE_DOWN_MSG;
> +     msg.info.avd->msg_info.n2d_node_down_info.msg_id = ++(cb-
> >snd_msg_id);
> +     msg.info.avd->msg_info.n2d_node_down_info.node_id = cb-
> >node_info.nodeId;
> +     rc = avnd_di_msg_send(cb, &msg);
> +     if (rc == NCSCC_RC_SUCCESS) {
> +             msg.info.avd = 0;
> +     }
> +
> +     // free the contents of avnd message
> +     avnd_msg_content_free(cb, &msg);
> +     TRACE_LEAVE();
> +     return NCSCC_RC_SUCCESS;
> +}
> +
> +/************************************************************
> ****************
>    Name          : avnd_di_msg_ack_process
> 
>    Description   : This routine processes the the acks that are generated by
> @@ -1179,6 +1227,12 @@ void avnd_di_msg_ack_process(AVND_CB *cb
>       /* find & pop the matching record */
>       m_AVND_DIQ_REC_FIND(cb, mid, rec);
>       if (rec) {
> +             if (rec->msg.info.avd->msg_type ==
> AVSV_N2D_NODE_DOWN_MSG) {
> +                     // first to stop timer to avoid processing timeout
> event
> +                     // then perform last step clean up
> +                     avnd_stop_tmr(cb, &rec->resp_tmr);
> +                     avnd_last_step_clean(cb);
> +             }
>               m_AVND_DIQ_REC_FIND_POP(cb, rec);
>               avnd_diq_rec_del(cb, rec);
>       }
> @@ -1240,6 +1294,7 @@ AVND_DND_MSG_LIST *avnd_diq_rec_add(AVND
>       /* store the msg (transfer memory ownership) */
>       rec->msg.type = msg->type;
>       rec->msg.info.avd = msg->info.avd;
> +     rec->no_retries = 0;
>       msg->info.avd = 0;
> 
>       /* push the record to the AvD msg list */
> @@ -1402,8 +1457,12 @@ uint32_t avnd_diq_rec_send(AVND_CB *cb,
> 
>       /* start the msg response timer */
>       if (NCSCC_RC_SUCCESS == rc) {
> +             rec->no_retries++;
>               if (rec->msg.info.avd->msg_type ==
> AVSV_N2D_NODE_UP_MSG)
>                       m_AVND_TMR_MSG_RESP_START(cb, *rec, rc);
> +             if (rec->msg.info.avd->msg_type ==
> AVSV_N2D_NODE_DOWN_MSG) {
> +                     m_AVND_TMR_MSG_RESP_START(cb, *rec, rc);
> +             }
>               msg.info.avd = 0;
>       }
> 
> diff --git a/src/amf/amfnd/mds.cc b/src/amf/amfnd/mds.cc
> --- a/src/amf/amfnd/mds.cc
> +++ b/src/amf/amfnd/mds.cc
> @@ -1417,7 +1417,7 @@ uint32_t avnd_mds_send(AVND_CB *cb, AVND
>       case AVND_MSG_AVD:
>               send_info->i_to_svc = NCSMDS_SVC_ID_AVD;
>               /* Don't send any messages if we are shutting down */
> -             if (m_AVND_IS_SHUTTING_DOWN(cb)) {
> +             if (m_AVND_IS_SHUTTING_DOWN(cb) && msg->info.avd-
> >msg_type != AVSV_N2D_NODE_DOWN_MSG) {
>                       TRACE_1("Shutting down, not sending msg to
> AMFD.");
>                       goto done;
>               }
> diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc
> --- a/src/amf/amfnd/susm.cc
> +++ b/src/amf/amfnd/susm.cc
> @@ -1102,7 +1102,11 @@ uint32_t avnd_su_si_oper_done(AVND_CB *c
>                               }
>                       } else {
>                               LOG_NO("Removed assignments from AMF
> components");
> -                             avnd_last_step_clean(cb);
> +                             if (cb->is_avd_down == true) {
> +                                     avnd_last_step_clean(cb);
> +                             } else {
> +                                     avnd_di_node_down_msg_send(cb);
> +                             }
>                       }
>               }
>       }
> @@ -1168,7 +1172,11 @@ uint32_t avnd_su_si_oper_done(AVND_CB *c
>                       }
>               } else {
>                       LOG_NO("Removed assignments from AMF
> components");
> -                     avnd_last_step_clean(cb);
> +                     if (cb->is_avd_down == true) {
> +                             avnd_last_step_clean(cb);
> +                     } else {
> +                             avnd_di_node_down_msg_send(cb);
> +                     }
>               }
>       }
> 
> diff --git a/src/amf/amfnd/term.cc b/src/amf/amfnd/term.cc
> --- a/src/amf/amfnd/term.cc
> +++ b/src/amf/amfnd/term.cc
> @@ -175,8 +175,13 @@ uint32_t avnd_evt_last_step_term_evh(AVN
>       }
> 
>  cleanup_components:
> -     if (!si_removed)
> -             avnd_last_step_clean(cb);
> +     if (!si_removed) {
> +             if (cb->is_avd_down == true) {
> +                     avnd_last_step_clean(cb);
> +             } else {
> +                     avnd_di_node_down_msg_send(cb);
> +             }
> +     }
>  done:
>       TRACE_LEAVE();
>       return NCSCC_RC_SUCCESS;

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to