Sure, I wait after GA release. Thanks, Minh

> But, please push it after GA release.
>
> Thanks
> -Nagu
>
>> -----Original Message-----
>> From: Nagendra Kumar
>> Sent: 05 April 2017 14:38
>> To: Minh Hon Chau; [email protected]; Praveen Malviya;
>> [email protected]
>> Cc: [email protected]
>> Subject: RE: [PATCH 2 of 3] AMF: Handle node_down message [#2376] V2
>>
>> Ack for all the patches, tested.
>>
>> Thanks
>> -Nagu
>>
>> > -----Original Message-----
>> > From: Minh Hon Chau [mailto:[email protected]]
>> > Sent: 31 March 2017 06:04
>> > To: [email protected]; Nagendra Kumar; Praveen Malviya;
>> > [email protected]; [email protected]
>> > Cc: [email protected]
>> > Subject: [PATCH 2 of 3] AMF: Handle node_down message [#2376] V2
>> >
>> >  src/amf/amfd/main.cc      |   2 +-
>> >  src/amf/amfd/ndfsm.cc     |  61
>> > +++++++++++++++++++++++++++++++++++++++++++
>> >  src/amf/amfd/proc.h       |   1 +
>> >  src/amf/amfnd/avnd_defs.h |   2 +
>> >  src/amf/amfnd/avnd_di.h   |   1 +
>> >  src/amf/amfnd/avnd_mds.h  |   1 +
>> >  src/amf/amfnd/di.cc       |  66
>> > +++++++++++++++++++++++++++++++++++++++++++++-
>> >  src/amf/amfnd/mds.cc      |   2 +-
>> >  src/amf/amfnd/susm.cc     |  12 +++++++-
>> >  src/amf/amfnd/term.cc     |   9 ++++-
>> >  10 files changed, 149 insertions(+), 8 deletions(-)
>> >
>> >
>> > This patch is how amfnd and amfd handles node_down message.
>> > Before amfnd enters component termination, amfnd sends node_down
>> > message to amfd, a timer is started.
>> > In amfd, upon reception of node_down message, amfd will try to execute
>> > all of its pending jobs. A node ack is sent if amfd finish all its
>> > jobs.
>> > If the timer is expired or amfnd receives node ack message for
>> > NODE_DOWN_MSG, amfnd will continue its component termination phase
>> >
>> > diff --git a/src/amf/amfd/main.cc b/src/amf/amfd/main.cc
>> > --- a/src/amf/amfd/main.cc
>> > +++ b/src/amf/amfd/main.cc
>> > @@ -100,7 +100,7 @@ static const AVD_EVT_HDLR g_actv_list[AV
>> >    avd_pg_trk_act_evh,      /* AVD_EVT_PG_TRACK_ACT_MSG */
>> >    avd_oper_req_evh,        /* AVD_EVT_OPERATION_REQUEST_MSG */
>> >    avd_data_update_req_evh, /* AVD_EVT_DATA_REQUEST_MSG */
>> > -  invalid_evh,         /* AVD_EVT_NODE_DOWN_MSG */
>> > +  avd_node_down_evh,         /* AVD_EVT_NODE_DOWN_MSG */
>> >    avd_ack_nack_evh,            /* AVD_EVT_VERIFY_ACK_NACK_MSG */
>> >    avd_comp_validation_evh, /* AVD_EVT_COMP_VALIDATION_MSG
>> */
>> >    avd_nd_sisu_state_info_evh,       /*
>> > AVD_EVT_ND_SISU_STATE_INFO_MSG */
>> > diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc
>> > --- a/src/amf/amfd/ndfsm.cc
>> > +++ b/src/amf/amfd/ndfsm.cc
>> > @@ -531,6 +531,67 @@ done:
>> >  }
>> >
>> >
>> >
>> /*************************************************************
>> > ****************
>> > + * Function: avd_node_down_evh
>> > + *
>> > + * Purpose:  This function is the handler for node down event
>> > + indicating
>> > + * the arrival of the node_down message. AMFND sends this message
>> > + when
>> > + * AMFND is going to terminate OpenSAF SU(s), who are providing
>> > + services
>> > + * that AMFD may need. When AMFD receives this message, AMFD
>> > + currently
>> > + * will execute all pending IMM update jobs to avoid a loss of IMM
>> > + data
>> > + *
>> > + * Input: cb - the AVD control block
>> > + *        evt - The event information.
>> > + *
>> > + * Returns: None.
>> > + *
>> > + * NOTES:
>> > + *
>> > + *
>> > +
>> >
>> **************************************************************
>> > ************/
>> > +void avd_node_down_evh(AVD_CL_CB *cb, AVD_EVT *evt) {
>> > +  AVD_DND_MSG *n2d_msg = evt->info.avnd_msg;
>> > +  AVD_AVND *node = nullptr;
>> > +
>> > +  TRACE_ENTER2("from nodeId=0x%x", n2d_msg-
>> > >msg_info.n2d_node_down_info.node_id);
>> > +
>> > +  if (evt->info.avnd_msg->msg_type !=
>> > AVSV_N2D_NODE_DOWN_MSG) {
>> > +          LOG_WA("%s: wrong message type (%u)",
>> > __FUNCTION__,evt->info.avnd_msg->msg_type);
>> > +          goto done;
>> > +  }
>> > +
>> > +  if ((node = avd_node_find_nodeid(n2d_msg-
>> > >msg_info.n2d_node_down_info.node_id)) == nullptr) {
>> > +          LOG_WA("%s: invalid node ID (%x)", __FUNCTION__,
>> > n2d_msg->msg_info.n2d_node_down_info.node_id);
>> > +          goto done;
>> > +  }
>> > +
>> > +  if ((node->rcv_msg_id + 1) == n2d_msg-
>> > >msg_info.n2d_node_down_info.msg_id)
>> > +          m_AVD_SET_AVND_RCV_ID(cb, node, (n2d_msg-
>> > >msg_info.n2d_node_down_info.msg_id));
>> > +
>> > +  // try to execute all pending jobs
>> > +  AvdJobDequeueResultT ret = JOB_EXECUTED;
>> > +  while (Fifo::size() > 0) {
>> > +          ret = Fifo::execute(cb);
>> > +          if (ret != JOB_EXECUTED) {
>> > +                  LOG_WA("AMFD has (%d) pending jobs not being
>> > executed", Fifo::size());
>> > +                  break;
>> > +          }
>> > +  }
>> > +  if (ret == JOB_EXECUTED) {
>> > +          // send ack for node_down message to amfnd, so amfnd can
>> > continue termination phase
>> > +          if (avd_snd_node_ack_msg(cb, node, n2d_msg-
>> > >msg_info.n2d_node_down_info.msg_id) != NCSCC_RC_SUCCESS) {
>> > +                  /* log error that the director is not able to send the
>> > message */
>> > +                  LOG_ER("%s:%u: %u", __FILE__, __LINE__, node-
>> > >node_info.nodeId);
>> > +          }
>> > +  }
>> > +
>> > +done:
>> > +  avsv_dnd_msg_free(n2d_msg);
>> > +  evt->info.avnd_msg = nullptr;
>> > +  TRACE_LEAVE();
>> > +}
>> > +
>> >
>> +/************************************************************
>> > *****************
>> >   * Function: avd_nd_ncs_su_assigned
>> >   *
>> >   * Purpose:  This function is the handler for node director event
>> > when a diff --git a/src/amf/amfd/proc.h b/src/amf/amfd/proc.h
>> > --- a/src/amf/amfd/proc.h
>> > +++ b/src/amf/amfd/proc.h
>> > @@ -64,6 +64,7 @@ uint32_t avd_evt_queue_count(AVD_CL_CB *
>> uint32_t
>> > avd_count_sync_node_size(AVD_CL_CB *cb);  void
>> > avd_process_state_info_queue(AVD_CL_CB *cb);  void
>> > avd_node_up_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
>> > +void avd_node_down_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
>> >  void avd_reg_su_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);  void
>> > avd_oper_req_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);  void
>> > avd_mds_avnd_up_evh(AVD_CL_CB *cb, struct AVD_EVT *evt); diff --git
>> > a/src/amf/amfnd/avnd_defs.h b/src/amf/amfnd/avnd_defs.h
>> > --- a/src/amf/amfnd/avnd_defs.h
>> > +++ b/src/amf/amfnd/avnd_defs.h
>> > @@ -62,6 +62,8 @@
>> >  #define AVND_COMP_CBK_RESP_TIME       5000        /* time out callback
>> > response */
>> >  #define AVND_AVD_MSG_RESP_TIME   1000     /* time out AvD message
>> > response */
>> >
>> > +#define AVND_NODE_DOWN_MAX_RETRY  10 /* max retries on waiting
>> > for ack of node_down msg */
>> > +
>> >  #define m_AVND_STACKSIZE       NCS_STACKSIZE_HUGE
>> >
>> >  typedef enum {
>> > diff --git a/src/amf/amfnd/avnd_di.h b/src/amf/amfnd/avnd_di.h
>> > --- a/src/amf/amfnd/avnd_di.h
>> > +++ b/src/amf/amfnd/avnd_di.h
>> > @@ -82,6 +82,7 @@ void avnd_diq_rec_del(struct avnd_cb_tag  void
>> > avnd_diq_rec_send_buffered_msg(struct avnd_cb_tag *cb);  uint32_t
>> > avnd_diq_rec_send(struct avnd_cb_tag *cb, AVND_DND_MSG_LIST *rec);
>> > uint32_t avnd_di_reg_su_rsp_snd(struct avnd_cb_tag *cb, const
>> > std::string& su_name, uint32_t ret_code);
>> > +uint32_t avnd_di_node_down_msg_send(struct avnd_cb_tag *cb);
>> >  uint32_t avnd_di_ack_nack_msg_send(struct avnd_cb_tag *cb, uint32_t
>> > rcv_id, uint32_t view_num);  extern void avnd_di_uns32_upd_send(int
>> > class_id, int attr_id, const std::string& dn, uint32_t value);  extern
>> > uint32_t avnd_di_resend_pg_start_track(struct avnd_cb_tag *); diff
>> > --git a/src/amf/amfnd/avnd_mds.h b/src/amf/amfnd/avnd_mds.h
>> > --- a/src/amf/amfnd/avnd_mds.h
>> > +++ b/src/amf/amfnd/avnd_mds.h
>> > @@ -66,6 +66,7 @@ typedef struct avnd_dnd_msg_list_tag {
>> >    AVND_MSG msg;
>> >    AVND_TMR resp_tmr;
>> >    uint32_t opq_hdl;
>> > +  uint16_t no_retries;
>> >    struct avnd_dnd_msg_list_tag *next;
>> >  } AVND_DND_MSG_LIST;
>> >
>> > diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
>> > --- a/src/amf/amfnd/di.cc
>> > +++ b/src/amf/amfnd/di.cc
>> > @@ -430,17 +430,35 @@ uint32_t avnd_evt_tmr_rcv_msg_rsp_evh(AV
>> >    AVND_TMR_EVT *tmr = &evt->info.tmr;
>> >    AVND_DND_MSG_LIST *rec = 0;
>> >    uint32_t rc = NCSCC_RC_SUCCESS;
>> > -
>> > +  bool rec_tobe_deleted = false;
>> >    TRACE_ENTER();
>> >
>> >    /* retrieve the message record */
>> >    if ((0 == (rec = (AVND_DND_MSG_LIST
>> > *)ncshm_take_hdl(NCS_SERVICE_ID_AVND, tmr->opq_hdl))))
>> >            goto done;
>> >
>> > -  rc = avnd_diq_rec_send(cb, rec);
>> > +  /* Resend on time out if it's NODE_UP msg only */
>> > +  if (rec->msg.info.avd->msg_type == AVSV_N2D_NODE_UP_MSG) {
>> > +          rc = avnd_diq_rec_send(cb, rec);
>> > +  } else if (rec->msg.info.avd->msg_type ==
>> > AVSV_N2D_NODE_DOWN_MSG) {
>> > +          if (rec->no_retries < AVND_NODE_DOWN_MAX_RETRY) {
>> > +                  rc = avnd_diq_rec_send(cb, rec);
>> > +          } else {
>> > +                  LOG_WA("Node Down timer retries is over");
>> > +                  avnd_last_step_clean(cb);
>> > +                  rec_tobe_deleted = true;
>> > +          }
>> > +  } else {
>> > +          LOG_WA("Unexpected message response timeout with
>> > msg_type(%u)", rec->msg.info.avd->msg_type);
>> > +          rec_tobe_deleted = true;
>> > +  }
>> >
>> >    ncshm_give_hdl(tmr->opq_hdl);
>> >
>> > +  if (rec_tobe_deleted) {
>> > +          m_AVND_DIQ_REC_FIND_POP(cb, rec);
>> > +          avnd_diq_rec_del(cb, rec);
>> > +  }
>> >  done:
>> >    TRACE_LEAVE();
>> >    return rc;
>> > @@ -1159,6 +1177,39 @@ uint32_t avnd_di_reg_su_rsp_snd(AVND_CB  }
>> >
>> >
>> >
>> /*************************************************************
>> > ***************
>> > +  Name          : avnd_di_node_down_msg_send
>> > +
>> > +  Description   : This routine sends node_down message to active amf
>> > director.
>> > +
>> > +  Arguments     : cb  - ptr to the AvND control block
>> > +
>> > +  Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE
>> > +
>> > +  Notes         : None.
>> >
>> +*************************************************************
>> > *****************/
>> > +uint32_t avnd_di_node_down_msg_send(AVND_CB *cb) {
>> > +  AVND_MSG msg;
>> > +  uint32_t rc = NCSCC_RC_SUCCESS;
>> > +  TRACE_ENTER();
>> > +  memset(&msg, 0, sizeof(AVND_MSG));
>> > +  msg.info.avd = static_cast<AVSV_DND_MSG*>(calloc(1,
>> > sizeof(AVSV_DND_MSG)));
>> > +  msg.type = AVND_MSG_AVD;
>> > +  msg.info.avd->msg_type = AVSV_N2D_NODE_DOWN_MSG;
>> > +  msg.info.avd->msg_info.n2d_node_down_info.msg_id = ++(cb-
>> > >snd_msg_id);
>> > +  msg.info.avd->msg_info.n2d_node_down_info.node_id = cb-
>> > >node_info.nodeId;
>> > +  rc = avnd_di_msg_send(cb, &msg);
>> > +  if (rc == NCSCC_RC_SUCCESS) {
>> > +          msg.info.avd = 0;
>> > +  }
>> > +
>> > +  // free the contents of avnd message
>> > +  avnd_msg_content_free(cb, &msg);
>> > +  TRACE_LEAVE();
>> > +  return NCSCC_RC_SUCCESS;
>> > +}
>> > +
>> >
>> +/************************************************************
>> > ****************
>> >    Name          : avnd_di_msg_ack_process
>> >
>> >    Description   : This routine processes the the acks that are
>> generated by
>> > @@ -1179,6 +1230,12 @@ void avnd_di_msg_ack_process(AVND_CB *cb
>> >    /* find & pop the matching record */
>> >    m_AVND_DIQ_REC_FIND(cb, mid, rec);
>> >    if (rec) {
>> > +          if (rec->msg.info.avd->msg_type ==
>> > AVSV_N2D_NODE_DOWN_MSG) {
>> > +                  // first to stop timer to avoid processing timeout
>> > event
>> > +                  // then perform last step clean up
>> > +                  avnd_stop_tmr(cb, &rec->resp_tmr);
>> > +                  avnd_last_step_clean(cb);
>> > +          }
>> >            m_AVND_DIQ_REC_FIND_POP(cb, rec);
>> >            avnd_diq_rec_del(cb, rec);
>> >    }
>> > @@ -1240,6 +1297,7 @@ AVND_DND_MSG_LIST
>> *avnd_diq_rec_add(AVND
>> >    /* store the msg (transfer memory ownership) */
>> >    rec->msg.type = msg->type;
>> >    rec->msg.info.avd = msg->info.avd;
>> > +  rec->no_retries = 0;
>> >    msg->info.avd = 0;
>> >
>> >    /* push the record to the AvD msg list */ @@ -1402,8 +1460,12 @@
>> > uint32_t avnd_diq_rec_send(AVND_CB *cb,
>> >
>> >    /* start the msg response timer */
>> >    if (NCSCC_RC_SUCCESS == rc) {
>> > +          rec->no_retries++;
>> >            if (rec->msg.info.avd->msg_type ==
>> > AVSV_N2D_NODE_UP_MSG)
>> >                    m_AVND_TMR_MSG_RESP_START(cb, *rec, rc);
>> > +          if (rec->msg.info.avd->msg_type ==
>> > AVSV_N2D_NODE_DOWN_MSG) {
>> > +                  m_AVND_TMR_MSG_RESP_START(cb, *rec, rc);
>> > +          }
>> >            msg.info.avd = 0;
>> >    }
>> >
>> > diff --git a/src/amf/amfnd/mds.cc b/src/amf/amfnd/mds.cc
>> > --- a/src/amf/amfnd/mds.cc
>> > +++ b/src/amf/amfnd/mds.cc
>> > @@ -1417,7 +1417,7 @@ uint32_t avnd_mds_send(AVND_CB *cb, AVND
>> >    case AVND_MSG_AVD:
>> >            send_info->i_to_svc = NCSMDS_SVC_ID_AVD;
>> >            /* Don't send any messages if we are shutting down */
>> > -          if (m_AVND_IS_SHUTTING_DOWN(cb)) {
>> > +          if (m_AVND_IS_SHUTTING_DOWN(cb) && msg->info.avd-
>> > >msg_type != AVSV_N2D_NODE_DOWN_MSG) {
>> >                    TRACE_1("Shutting down, not sending msg to
>> AMFD.");
>> >                    goto done;
>> >            }
>> > diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc
>> > --- a/src/amf/amfnd/susm.cc
>> > +++ b/src/amf/amfnd/susm.cc
>> > @@ -1102,7 +1102,11 @@ uint32_t avnd_su_si_oper_done(AVND_CB *c
>> >                            }
>> >                    } else {
>> >                            LOG_NO("Removed assignments from AMF
>> components");
>> > -                          avnd_last_step_clean(cb);
>> > +                          if (m_NCS_NODE_ID_FROM_MDS_DEST(cb-
>> > >active_avd_adest) != ncs_get_node_id()) {
>> > +                                  avnd_last_step_clean(cb);
>> > +                          } else {
>> > +                                  avnd_di_node_down_msg_send(cb);
>> > +                          }
>> >                    }
>> >            }
>> >    }
>> > @@ -1168,7 +1172,11 @@ uint32_t avnd_su_si_oper_done(AVND_CB *c
>> >                    }
>> >            } else {
>> >                    LOG_NO("Removed assignments from AMF
>> components");
>> > -                  avnd_last_step_clean(cb);
>> > +                  if (m_NCS_NODE_ID_FROM_MDS_DEST(cb-
>> > >active_avd_adest) != ncs_get_node_id()) {
>> > +                          avnd_last_step_clean(cb);
>> > +                  } else {
>> > +                          avnd_di_node_down_msg_send(cb);
>> > +                  }
>> >            }
>> >    }
>> >
>> > diff --git a/src/amf/amfnd/term.cc b/src/amf/amfnd/term.cc
>> > --- a/src/amf/amfnd/term.cc
>> > +++ b/src/amf/amfnd/term.cc
>> > @@ -175,8 +175,13 @@ uint32_t avnd_evt_last_step_term_evh(AVN
>> >    }
>> >
>> >  cleanup_components:
>> > -  if (!si_removed)
>> > -          avnd_last_step_clean(cb);
>> > +  if (!si_removed) {
>> > +          if (m_NCS_NODE_ID_FROM_MDS_DEST(cb-
>> > >active_avd_adest) != ncs_get_node_id()) {
>> > +                  avnd_last_step_clean(cb);
>> > +          } else {
>> > +                  avnd_di_node_down_msg_send(cb);
>> > +          }
>> > +  }
>> >  done:
>> >    TRACE_LEAVE();
>> >    return NCSCC_RC_SUCCESS;
>



------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to