src/amf/amfd/main.cc      |   2 +-
 src/amf/amfd/ndfsm.cc     |  61 +++++++++++++++++++++++++++++++++++++++++++
 src/amf/amfd/proc.h       |   1 +
 src/amf/amfnd/avnd_defs.h |   2 +
 src/amf/amfnd/avnd_di.h   |   1 +
 src/amf/amfnd/avnd_mds.h  |   1 +
 src/amf/amfnd/di.cc       |  66 +++++++++++++++++++++++++++++++++++++++++++++-
 src/amf/amfnd/mds.cc      |   2 +-
 src/amf/amfnd/susm.cc     |  12 +++++++-
 src/amf/amfnd/term.cc     |   9 ++++-
 10 files changed, 149 insertions(+), 8 deletions(-)


This patch is how amfnd and amfd handles node_down message.
Before amfnd enters component termination, amfnd sends
node_down message to amfd, a timer is started.
In amfd, upon reception of node_down message, amfd will try
to execute all of its pending jobs. A node ack is sent if amfd
finish all its jobs.
If the timer is expired or amfnd receives node ack message for
NODE_DOWN_MSG, amfnd will continue its component termination
phase

diff --git a/src/amf/amfd/main.cc b/src/amf/amfd/main.cc
--- a/src/amf/amfd/main.cc
+++ b/src/amf/amfd/main.cc
@@ -100,7 +100,7 @@ static const AVD_EVT_HDLR g_actv_list[AV
        avd_pg_trk_act_evh,      /* AVD_EVT_PG_TRACK_ACT_MSG */
        avd_oper_req_evh,        /* AVD_EVT_OPERATION_REQUEST_MSG */
        avd_data_update_req_evh, /* AVD_EVT_DATA_REQUEST_MSG */
-       invalid_evh,         /* AVD_EVT_NODE_DOWN_MSG */
+       avd_node_down_evh,         /* AVD_EVT_NODE_DOWN_MSG */
        avd_ack_nack_evh,            /* AVD_EVT_VERIFY_ACK_NACK_MSG */
        avd_comp_validation_evh, /* AVD_EVT_COMP_VALIDATION_MSG */
        avd_nd_sisu_state_info_evh,       /* AVD_EVT_ND_SISU_STATE_INFO_MSG */
diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc
--- a/src/amf/amfd/ndfsm.cc
+++ b/src/amf/amfd/ndfsm.cc
@@ -531,6 +531,67 @@ done:
 }
 
 /*****************************************************************************
+ * Function: avd_node_down_evh
+ *
+ * Purpose:  This function is the handler for node down event indicating
+ * the arrival of the node_down message. AMFND sends this message when
+ * AMFND is going to terminate OpenSAF SU(s), who are providing services
+ * that AMFD may need. When AMFD receives this message, AMFD currently
+ * will execute all pending IMM update jobs to avoid a loss of IMM data
+ *
+ * Input: cb - the AVD control block
+ *        evt - The event information.
+ *
+ * Returns: None.
+ *
+ * NOTES:
+ *
+ *
+ **************************************************************************/
+void avd_node_down_evh(AVD_CL_CB *cb, AVD_EVT *evt)
+{
+       AVD_DND_MSG *n2d_msg = evt->info.avnd_msg;
+       AVD_AVND *node = nullptr;
+
+       TRACE_ENTER2("from nodeId=0x%x", 
n2d_msg->msg_info.n2d_node_down_info.node_id);
+
+       if (evt->info.avnd_msg->msg_type != AVSV_N2D_NODE_DOWN_MSG) {
+               LOG_WA("%s: wrong message type (%u)", 
__FUNCTION__,evt->info.avnd_msg->msg_type);
+               goto done;
+       }
+
+       if ((node = 
avd_node_find_nodeid(n2d_msg->msg_info.n2d_node_down_info.node_id)) == nullptr) 
{
+               LOG_WA("%s: invalid node ID (%x)", __FUNCTION__, 
n2d_msg->msg_info.n2d_node_down_info.node_id);
+               goto done;
+       }
+
+       if ((node->rcv_msg_id + 1) == 
n2d_msg->msg_info.n2d_node_down_info.msg_id)
+               m_AVD_SET_AVND_RCV_ID(cb, node, 
(n2d_msg->msg_info.n2d_node_down_info.msg_id));
+
+       // try to execute all pending jobs
+       AvdJobDequeueResultT ret = JOB_EXECUTED;
+       while (Fifo::size() > 0) {
+               ret = Fifo::execute(cb);
+               if (ret != JOB_EXECUTED) {
+                       LOG_WA("AMFD has (%d) pending jobs not being executed", 
Fifo::size());
+                       break;
+               }
+       }
+       if (ret == JOB_EXECUTED) {
+               // send ack for node_down message to amfnd, so amfnd can 
continue termination phase
+               if (avd_snd_node_ack_msg(cb, node, 
n2d_msg->msg_info.n2d_node_down_info.msg_id) != NCSCC_RC_SUCCESS) {
+                       /* log error that the director is not able to send the 
message */
+                       LOG_ER("%s:%u: %u", __FILE__, __LINE__, 
node->node_info.nodeId);
+               }
+       }
+
+done:
+       avsv_dnd_msg_free(n2d_msg);
+       evt->info.avnd_msg = nullptr;
+       TRACE_LEAVE();
+}
+
+/*****************************************************************************
  * Function: avd_nd_ncs_su_assigned
  *
  * Purpose:  This function is the handler for node director event when a
diff --git a/src/amf/amfd/proc.h b/src/amf/amfd/proc.h
--- a/src/amf/amfd/proc.h
+++ b/src/amf/amfd/proc.h
@@ -64,6 +64,7 @@ uint32_t avd_evt_queue_count(AVD_CL_CB *
 uint32_t avd_count_sync_node_size(AVD_CL_CB *cb);
 void avd_process_state_info_queue(AVD_CL_CB *cb);
 void avd_node_up_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
+void avd_node_down_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
 void avd_reg_su_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
 void avd_oper_req_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
 void avd_mds_avnd_up_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
diff --git a/src/amf/amfnd/avnd_defs.h b/src/amf/amfnd/avnd_defs.h
--- a/src/amf/amfnd/avnd_defs.h
+++ b/src/amf/amfnd/avnd_defs.h
@@ -62,6 +62,8 @@
 #define AVND_COMP_CBK_RESP_TIME       5000     /* time out callback response */
 #define AVND_AVD_MSG_RESP_TIME   1000  /* time out AvD message response */
 
+#define AVND_NODE_DOWN_MAX_RETRY       10 /* max retries on waiting for ack of 
node_down msg */
+
 #define m_AVND_STACKSIZE       NCS_STACKSIZE_HUGE
 
 typedef enum {
diff --git a/src/amf/amfnd/avnd_di.h b/src/amf/amfnd/avnd_di.h
--- a/src/amf/amfnd/avnd_di.h
+++ b/src/amf/amfnd/avnd_di.h
@@ -82,6 +82,7 @@ void avnd_diq_rec_del(struct avnd_cb_tag
 void avnd_diq_rec_send_buffered_msg(struct avnd_cb_tag *cb);
 uint32_t avnd_diq_rec_send(struct avnd_cb_tag *cb, AVND_DND_MSG_LIST *rec);
 uint32_t avnd_di_reg_su_rsp_snd(struct avnd_cb_tag *cb, const std::string& 
su_name, uint32_t ret_code);
+uint32_t avnd_di_node_down_msg_send(struct avnd_cb_tag *cb);
 uint32_t avnd_di_ack_nack_msg_send(struct avnd_cb_tag *cb, uint32_t rcv_id, 
uint32_t view_num);
 extern void avnd_di_uns32_upd_send(int class_id, int attr_id, const 
std::string& dn, uint32_t value);
 extern uint32_t avnd_di_resend_pg_start_track(struct avnd_cb_tag *);
diff --git a/src/amf/amfnd/avnd_mds.h b/src/amf/amfnd/avnd_mds.h
--- a/src/amf/amfnd/avnd_mds.h
+++ b/src/amf/amfnd/avnd_mds.h
@@ -66,6 +66,7 @@ typedef struct avnd_dnd_msg_list_tag {
        AVND_MSG msg;
        AVND_TMR resp_tmr;
        uint32_t opq_hdl;
+       uint16_t no_retries;
        struct avnd_dnd_msg_list_tag *next;
 } AVND_DND_MSG_LIST;
 
diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
--- a/src/amf/amfnd/di.cc
+++ b/src/amf/amfnd/di.cc
@@ -430,17 +430,35 @@ uint32_t avnd_evt_tmr_rcv_msg_rsp_evh(AV
        AVND_TMR_EVT *tmr = &evt->info.tmr;
        AVND_DND_MSG_LIST *rec = 0;
        uint32_t rc = NCSCC_RC_SUCCESS;
-
+       bool rec_tobe_deleted = false;
        TRACE_ENTER();
 
        /* retrieve the message record */
        if ((0 == (rec = (AVND_DND_MSG_LIST 
*)ncshm_take_hdl(NCS_SERVICE_ID_AVND, tmr->opq_hdl))))
                goto done;
 
-       rc = avnd_diq_rec_send(cb, rec);
+       /* Resend on time out if it's NODE_UP msg only */
+       if (rec->msg.info.avd->msg_type == AVSV_N2D_NODE_UP_MSG) {
+               rc = avnd_diq_rec_send(cb, rec);
+       } else if (rec->msg.info.avd->msg_type == AVSV_N2D_NODE_DOWN_MSG) {
+               if (rec->no_retries < AVND_NODE_DOWN_MAX_RETRY) {
+                       rc = avnd_diq_rec_send(cb, rec);
+               } else {
+                       LOG_WA("Node Down timer retries is over");
+                       avnd_last_step_clean(cb);
+                       rec_tobe_deleted = true;
+               }
+       } else {
+               LOG_WA("Unexpected message response timeout with msg_type(%u)", 
rec->msg.info.avd->msg_type);
+               rec_tobe_deleted = true;
+       }
 
        ncshm_give_hdl(tmr->opq_hdl);
 
+       if (rec_tobe_deleted) {
+               m_AVND_DIQ_REC_FIND_POP(cb, rec);
+               avnd_diq_rec_del(cb, rec);
+       }
 done:
        TRACE_LEAVE();
        return rc;
@@ -1159,6 +1177,39 @@ uint32_t avnd_di_reg_su_rsp_snd(AVND_CB 
 }
 
 /****************************************************************************
+  Name          : avnd_di_node_down_msg_send
+
+  Description   : This routine sends node_down message to active amf director.
+
+  Arguments     : cb  - ptr to the AvND control block
+
+  Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE
+
+  Notes         : None.
+******************************************************************************/
+uint32_t avnd_di_node_down_msg_send(AVND_CB *cb)
+{
+       AVND_MSG msg;
+       uint32_t rc = NCSCC_RC_SUCCESS;
+       TRACE_ENTER();
+       memset(&msg, 0, sizeof(AVND_MSG));
+       msg.info.avd = static_cast<AVSV_DND_MSG*>(calloc(1, 
sizeof(AVSV_DND_MSG)));
+       msg.type = AVND_MSG_AVD;
+       msg.info.avd->msg_type = AVSV_N2D_NODE_DOWN_MSG;
+       msg.info.avd->msg_info.n2d_node_down_info.msg_id = ++(cb->snd_msg_id);
+       msg.info.avd->msg_info.n2d_node_down_info.node_id = 
cb->node_info.nodeId;
+       rc = avnd_di_msg_send(cb, &msg);
+       if (rc == NCSCC_RC_SUCCESS) {
+               msg.info.avd = 0;
+       }
+
+       // free the contents of avnd message
+       avnd_msg_content_free(cb, &msg);
+       TRACE_LEAVE();
+       return NCSCC_RC_SUCCESS;
+}
+
+/****************************************************************************
   Name          : avnd_di_msg_ack_process
  
   Description   : This routine processes the the acks that are generated by
@@ -1179,6 +1230,12 @@ void avnd_di_msg_ack_process(AVND_CB *cb
        /* find & pop the matching record */
        m_AVND_DIQ_REC_FIND(cb, mid, rec);
        if (rec) {
+               if (rec->msg.info.avd->msg_type == AVSV_N2D_NODE_DOWN_MSG) {
+                       // first to stop timer to avoid processing timeout event
+                       // then perform last step clean up
+                       avnd_stop_tmr(cb, &rec->resp_tmr);
+                       avnd_last_step_clean(cb);
+               }
                m_AVND_DIQ_REC_FIND_POP(cb, rec);
                avnd_diq_rec_del(cb, rec);
        }
@@ -1240,6 +1297,7 @@ AVND_DND_MSG_LIST *avnd_diq_rec_add(AVND
        /* store the msg (transfer memory ownership) */
        rec->msg.type = msg->type;
        rec->msg.info.avd = msg->info.avd;
+       rec->no_retries = 0;
        msg->info.avd = 0;
 
        /* push the record to the AvD msg list */
@@ -1402,8 +1460,12 @@ uint32_t avnd_diq_rec_send(AVND_CB *cb, 
 
        /* start the msg response timer */
        if (NCSCC_RC_SUCCESS == rc) {
+               rec->no_retries++;
                if (rec->msg.info.avd->msg_type == AVSV_N2D_NODE_UP_MSG)
                        m_AVND_TMR_MSG_RESP_START(cb, *rec, rc);
+               if (rec->msg.info.avd->msg_type == AVSV_N2D_NODE_DOWN_MSG) {
+                       m_AVND_TMR_MSG_RESP_START(cb, *rec, rc);
+               }
                msg.info.avd = 0;
        }
 
diff --git a/src/amf/amfnd/mds.cc b/src/amf/amfnd/mds.cc
--- a/src/amf/amfnd/mds.cc
+++ b/src/amf/amfnd/mds.cc
@@ -1417,7 +1417,7 @@ uint32_t avnd_mds_send(AVND_CB *cb, AVND
        case AVND_MSG_AVD:
                send_info->i_to_svc = NCSMDS_SVC_ID_AVD;
                /* Don't send any messages if we are shutting down */
-               if (m_AVND_IS_SHUTTING_DOWN(cb)) {
+               if (m_AVND_IS_SHUTTING_DOWN(cb) && msg->info.avd->msg_type != 
AVSV_N2D_NODE_DOWN_MSG) {
                        TRACE_1("Shutting down, not sending msg to AMFD.");
                        goto done;
                }
diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc
--- a/src/amf/amfnd/susm.cc
+++ b/src/amf/amfnd/susm.cc
@@ -1102,7 +1102,11 @@ uint32_t avnd_su_si_oper_done(AVND_CB *c
                                }
                        } else {
                                LOG_NO("Removed assignments from AMF 
components");
-                               avnd_last_step_clean(cb);
+                               if 
(m_NCS_NODE_ID_FROM_MDS_DEST(cb->active_avd_adest) != ncs_get_node_id()) {
+                                       avnd_last_step_clean(cb);
+                               } else {
+                                       avnd_di_node_down_msg_send(cb);
+                               }
                        }
                }
        }
@@ -1168,7 +1172,11 @@ uint32_t avnd_su_si_oper_done(AVND_CB *c
                        }
                } else {
                        LOG_NO("Removed assignments from AMF components");
-                       avnd_last_step_clean(cb);
+                       if (m_NCS_NODE_ID_FROM_MDS_DEST(cb->active_avd_adest) 
!= ncs_get_node_id()) {
+                               avnd_last_step_clean(cb);
+                       } else {
+                               avnd_di_node_down_msg_send(cb);
+                       }
                }
        }
 
diff --git a/src/amf/amfnd/term.cc b/src/amf/amfnd/term.cc
--- a/src/amf/amfnd/term.cc
+++ b/src/amf/amfnd/term.cc
@@ -175,8 +175,13 @@ uint32_t avnd_evt_last_step_term_evh(AVN
        }
 
 cleanup_components:
-       if (!si_removed)
-               avnd_last_step_clean(cb);
+       if (!si_removed) {
+               if (m_NCS_NODE_ID_FROM_MDS_DEST(cb->active_avd_adest) != 
ncs_get_node_id()) {
+                       avnd_last_step_clean(cb);
+               } else {
+                       avnd_di_node_down_msg_send(cb);
+               }
+       }
 done:
        TRACE_LEAVE();
        return NCSCC_RC_SUCCESS;

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to