src/amf/amfd/main.cc | 2 +-
src/amf/amfd/ndfsm.cc | 61 +++++++++++++++++++++++++++++++++++++++++++
src/amf/amfd/proc.h | 1 +
src/amf/amfnd/avnd_defs.h | 2 +
src/amf/amfnd/avnd_di.h | 1 +
src/amf/amfnd/avnd_mds.h | 1 +
src/amf/amfnd/di.cc | 66 +++++++++++++++++++++++++++++++++++++++++++++-
src/amf/amfnd/mds.cc | 2 +-
src/amf/amfnd/susm.cc | 12 +++++++-
src/amf/amfnd/term.cc | 9 ++++-
10 files changed, 149 insertions(+), 8 deletions(-)
This patch is how amfnd and amfd handles node_down message.
Before amfnd enters component termination, amfnd sends
node_down message to amfd, a timer is started.
In amfd, upon reception of node_down message, amfd will try
to execute all of its pending jobs. A node ack is sent if amfd
finish all its jobs.
If the timer is expired or amfnd receives node ack message for
NODE_DOWN_MSG, amfnd will continue its component termination
phase
diff --git a/src/amf/amfd/main.cc b/src/amf/amfd/main.cc
--- a/src/amf/amfd/main.cc
+++ b/src/amf/amfd/main.cc
@@ -100,7 +100,7 @@ static const AVD_EVT_HDLR g_actv_list[AV
avd_pg_trk_act_evh, /* AVD_EVT_PG_TRACK_ACT_MSG */
avd_oper_req_evh, /* AVD_EVT_OPERATION_REQUEST_MSG */
avd_data_update_req_evh, /* AVD_EVT_DATA_REQUEST_MSG */
- invalid_evh, /* AVD_EVT_NODE_DOWN_MSG */
+ avd_node_down_evh, /* AVD_EVT_NODE_DOWN_MSG */
avd_ack_nack_evh, /* AVD_EVT_VERIFY_ACK_NACK_MSG */
avd_comp_validation_evh, /* AVD_EVT_COMP_VALIDATION_MSG */
avd_nd_sisu_state_info_evh, /* AVD_EVT_ND_SISU_STATE_INFO_MSG */
diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc
--- a/src/amf/amfd/ndfsm.cc
+++ b/src/amf/amfd/ndfsm.cc
@@ -531,6 +531,67 @@ done:
}
/*****************************************************************************
+ * Function: avd_node_down_evh
+ *
+ * Purpose: This function is the handler for node down event indicating
+ * the arrival of the node_down message. AMFND sends this message when
+ * AMFND is going to terminate OpenSAF SU(s), who are providing services
+ * that AMFD may need. When AMFD receives this message, AMFD currently
+ * will execute all pending IMM update jobs to avoid a loss of IMM data
+ *
+ * Input: cb - the AVD control block
+ * evt - The event information.
+ *
+ * Returns: None.
+ *
+ * NOTES:
+ *
+ *
+ **************************************************************************/
+void avd_node_down_evh(AVD_CL_CB *cb, AVD_EVT *evt)
+{
+ AVD_DND_MSG *n2d_msg = evt->info.avnd_msg;
+ AVD_AVND *node = nullptr;
+
+ TRACE_ENTER2("from nodeId=0x%x",
n2d_msg->msg_info.n2d_node_down_info.node_id);
+
+ if (evt->info.avnd_msg->msg_type != AVSV_N2D_NODE_DOWN_MSG) {
+ LOG_WA("%s: wrong message type (%u)",
__FUNCTION__,evt->info.avnd_msg->msg_type);
+ goto done;
+ }
+
+ if ((node =
avd_node_find_nodeid(n2d_msg->msg_info.n2d_node_down_info.node_id)) == nullptr)
{
+ LOG_WA("%s: invalid node ID (%x)", __FUNCTION__,
n2d_msg->msg_info.n2d_node_down_info.node_id);
+ goto done;
+ }
+
+ if ((node->rcv_msg_id + 1) ==
n2d_msg->msg_info.n2d_node_down_info.msg_id)
+ m_AVD_SET_AVND_RCV_ID(cb, node,
(n2d_msg->msg_info.n2d_node_down_info.msg_id));
+
+ // try to execute all pending jobs
+ AvdJobDequeueResultT ret = JOB_EXECUTED;
+ while (Fifo::size() > 0) {
+ ret = Fifo::execute(cb);
+ if (ret != JOB_EXECUTED) {
+ LOG_WA("AMFD has (%d) pending jobs not being executed",
Fifo::size());
+ break;
+ }
+ }
+ if (ret == JOB_EXECUTED) {
+ // send ack for node_down message to amfnd, so amfnd can
continue termination phase
+ if (avd_snd_node_ack_msg(cb, node,
n2d_msg->msg_info.n2d_node_down_info.msg_id) != NCSCC_RC_SUCCESS) {
+ /* log error that the director is not able to send the
message */
+ LOG_ER("%s:%u: %u", __FILE__, __LINE__,
node->node_info.nodeId);
+ }
+ }
+
+done:
+ avsv_dnd_msg_free(n2d_msg);
+ evt->info.avnd_msg = nullptr;
+ TRACE_LEAVE();
+}
+
+/*****************************************************************************
* Function: avd_nd_ncs_su_assigned
*
* Purpose: This function is the handler for node director event when a
diff --git a/src/amf/amfd/proc.h b/src/amf/amfd/proc.h
--- a/src/amf/amfd/proc.h
+++ b/src/amf/amfd/proc.h
@@ -64,6 +64,7 @@ uint32_t avd_evt_queue_count(AVD_CL_CB *
uint32_t avd_count_sync_node_size(AVD_CL_CB *cb);
void avd_process_state_info_queue(AVD_CL_CB *cb);
void avd_node_up_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
+void avd_node_down_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
void avd_reg_su_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
void avd_oper_req_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
void avd_mds_avnd_up_evh(AVD_CL_CB *cb, struct AVD_EVT *evt);
diff --git a/src/amf/amfnd/avnd_defs.h b/src/amf/amfnd/avnd_defs.h
--- a/src/amf/amfnd/avnd_defs.h
+++ b/src/amf/amfnd/avnd_defs.h
@@ -62,6 +62,8 @@
#define AVND_COMP_CBK_RESP_TIME 5000 /* time out callback response */
#define AVND_AVD_MSG_RESP_TIME 1000 /* time out AvD message response */
+#define AVND_NODE_DOWN_MAX_RETRY 10 /* max retries on waiting for ack of
node_down msg */
+
#define m_AVND_STACKSIZE NCS_STACKSIZE_HUGE
typedef enum {
diff --git a/src/amf/amfnd/avnd_di.h b/src/amf/amfnd/avnd_di.h
--- a/src/amf/amfnd/avnd_di.h
+++ b/src/amf/amfnd/avnd_di.h
@@ -82,6 +82,7 @@ void avnd_diq_rec_del(struct avnd_cb_tag
void avnd_diq_rec_send_buffered_msg(struct avnd_cb_tag *cb);
uint32_t avnd_diq_rec_send(struct avnd_cb_tag *cb, AVND_DND_MSG_LIST *rec);
uint32_t avnd_di_reg_su_rsp_snd(struct avnd_cb_tag *cb, const std::string&
su_name, uint32_t ret_code);
+uint32_t avnd_di_node_down_msg_send(struct avnd_cb_tag *cb);
uint32_t avnd_di_ack_nack_msg_send(struct avnd_cb_tag *cb, uint32_t rcv_id,
uint32_t view_num);
extern void avnd_di_uns32_upd_send(int class_id, int attr_id, const
std::string& dn, uint32_t value);
extern uint32_t avnd_di_resend_pg_start_track(struct avnd_cb_tag *);
diff --git a/src/amf/amfnd/avnd_mds.h b/src/amf/amfnd/avnd_mds.h
--- a/src/amf/amfnd/avnd_mds.h
+++ b/src/amf/amfnd/avnd_mds.h
@@ -66,6 +66,7 @@ typedef struct avnd_dnd_msg_list_tag {
AVND_MSG msg;
AVND_TMR resp_tmr;
uint32_t opq_hdl;
+ uint16_t no_retries;
struct avnd_dnd_msg_list_tag *next;
} AVND_DND_MSG_LIST;
diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
--- a/src/amf/amfnd/di.cc
+++ b/src/amf/amfnd/di.cc
@@ -430,17 +430,35 @@ uint32_t avnd_evt_tmr_rcv_msg_rsp_evh(AV
AVND_TMR_EVT *tmr = &evt->info.tmr;
AVND_DND_MSG_LIST *rec = 0;
uint32_t rc = NCSCC_RC_SUCCESS;
-
+ bool rec_tobe_deleted = false;
TRACE_ENTER();
/* retrieve the message record */
if ((0 == (rec = (AVND_DND_MSG_LIST
*)ncshm_take_hdl(NCS_SERVICE_ID_AVND, tmr->opq_hdl))))
goto done;
- rc = avnd_diq_rec_send(cb, rec);
+ /* Resend on time out if it's NODE_UP msg only */
+ if (rec->msg.info.avd->msg_type == AVSV_N2D_NODE_UP_MSG) {
+ rc = avnd_diq_rec_send(cb, rec);
+ } else if (rec->msg.info.avd->msg_type == AVSV_N2D_NODE_DOWN_MSG) {
+ if (rec->no_retries < AVND_NODE_DOWN_MAX_RETRY) {
+ rc = avnd_diq_rec_send(cb, rec);
+ } else {
+ LOG_WA("Node Down timer retries is over");
+ avnd_last_step_clean(cb);
+ rec_tobe_deleted = true;
+ }
+ } else {
+ LOG_WA("Unexpected message response timeout with msg_type(%u)",
rec->msg.info.avd->msg_type);
+ rec_tobe_deleted = true;
+ }
ncshm_give_hdl(tmr->opq_hdl);
+ if (rec_tobe_deleted) {
+ m_AVND_DIQ_REC_FIND_POP(cb, rec);
+ avnd_diq_rec_del(cb, rec);
+ }
done:
TRACE_LEAVE();
return rc;
@@ -1159,6 +1177,39 @@ uint32_t avnd_di_reg_su_rsp_snd(AVND_CB
}
/****************************************************************************
+ Name : avnd_di_node_down_msg_send
+
+ Description : This routine sends node_down message to active amf director.
+
+ Arguments : cb - ptr to the AvND control block
+
+ Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE
+
+ Notes : None.
+******************************************************************************/
+uint32_t avnd_di_node_down_msg_send(AVND_CB *cb)
+{
+ AVND_MSG msg;
+ uint32_t rc = NCSCC_RC_SUCCESS;
+ TRACE_ENTER();
+ memset(&msg, 0, sizeof(AVND_MSG));
+ msg.info.avd = static_cast<AVSV_DND_MSG*>(calloc(1,
sizeof(AVSV_DND_MSG)));
+ msg.type = AVND_MSG_AVD;
+ msg.info.avd->msg_type = AVSV_N2D_NODE_DOWN_MSG;
+ msg.info.avd->msg_info.n2d_node_down_info.msg_id = ++(cb->snd_msg_id);
+ msg.info.avd->msg_info.n2d_node_down_info.node_id =
cb->node_info.nodeId;
+ rc = avnd_di_msg_send(cb, &msg);
+ if (rc == NCSCC_RC_SUCCESS) {
+ msg.info.avd = 0;
+ }
+
+ // free the contents of avnd message
+ avnd_msg_content_free(cb, &msg);
+ TRACE_LEAVE();
+ return NCSCC_RC_SUCCESS;
+}
+
+/****************************************************************************
Name : avnd_di_msg_ack_process
Description : This routine processes the the acks that are generated by
@@ -1179,6 +1230,12 @@ void avnd_di_msg_ack_process(AVND_CB *cb
/* find & pop the matching record */
m_AVND_DIQ_REC_FIND(cb, mid, rec);
if (rec) {
+ if (rec->msg.info.avd->msg_type == AVSV_N2D_NODE_DOWN_MSG) {
+ // first to stop timer to avoid processing timeout event
+ // then perform last step clean up
+ avnd_stop_tmr(cb, &rec->resp_tmr);
+ avnd_last_step_clean(cb);
+ }
m_AVND_DIQ_REC_FIND_POP(cb, rec);
avnd_diq_rec_del(cb, rec);
}
@@ -1240,6 +1297,7 @@ AVND_DND_MSG_LIST *avnd_diq_rec_add(AVND
/* store the msg (transfer memory ownership) */
rec->msg.type = msg->type;
rec->msg.info.avd = msg->info.avd;
+ rec->no_retries = 0;
msg->info.avd = 0;
/* push the record to the AvD msg list */
@@ -1402,8 +1460,12 @@ uint32_t avnd_diq_rec_send(AVND_CB *cb,
/* start the msg response timer */
if (NCSCC_RC_SUCCESS == rc) {
+ rec->no_retries++;
if (rec->msg.info.avd->msg_type == AVSV_N2D_NODE_UP_MSG)
m_AVND_TMR_MSG_RESP_START(cb, *rec, rc);
+ if (rec->msg.info.avd->msg_type == AVSV_N2D_NODE_DOWN_MSG) {
+ m_AVND_TMR_MSG_RESP_START(cb, *rec, rc);
+ }
msg.info.avd = 0;
}
diff --git a/src/amf/amfnd/mds.cc b/src/amf/amfnd/mds.cc
--- a/src/amf/amfnd/mds.cc
+++ b/src/amf/amfnd/mds.cc
@@ -1417,7 +1417,7 @@ uint32_t avnd_mds_send(AVND_CB *cb, AVND
case AVND_MSG_AVD:
send_info->i_to_svc = NCSMDS_SVC_ID_AVD;
/* Don't send any messages if we are shutting down */
- if (m_AVND_IS_SHUTTING_DOWN(cb)) {
+ if (m_AVND_IS_SHUTTING_DOWN(cb) && msg->info.avd->msg_type !=
AVSV_N2D_NODE_DOWN_MSG) {
TRACE_1("Shutting down, not sending msg to AMFD.");
goto done;
}
diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc
--- a/src/amf/amfnd/susm.cc
+++ b/src/amf/amfnd/susm.cc
@@ -1102,7 +1102,11 @@ uint32_t avnd_su_si_oper_done(AVND_CB *c
}
} else {
LOG_NO("Removed assignments from AMF
components");
- avnd_last_step_clean(cb);
+ if
(m_NCS_NODE_ID_FROM_MDS_DEST(cb->active_avd_adest) != ncs_get_node_id()) {
+ avnd_last_step_clean(cb);
+ } else {
+ avnd_di_node_down_msg_send(cb);
+ }
}
}
}
@@ -1168,7 +1172,11 @@ uint32_t avnd_su_si_oper_done(AVND_CB *c
}
} else {
LOG_NO("Removed assignments from AMF components");
- avnd_last_step_clean(cb);
+ if (m_NCS_NODE_ID_FROM_MDS_DEST(cb->active_avd_adest)
!= ncs_get_node_id()) {
+ avnd_last_step_clean(cb);
+ } else {
+ avnd_di_node_down_msg_send(cb);
+ }
}
}
diff --git a/src/amf/amfnd/term.cc b/src/amf/amfnd/term.cc
--- a/src/amf/amfnd/term.cc
+++ b/src/amf/amfnd/term.cc
@@ -175,8 +175,13 @@ uint32_t avnd_evt_last_step_term_evh(AVN
}
cleanup_components:
- if (!si_removed)
- avnd_last_step_clean(cb);
+ if (!si_removed) {
+ if (m_NCS_NODE_ID_FROM_MDS_DEST(cb->active_avd_adest) !=
ncs_get_node_id()) {
+ avnd_last_step_clean(cb);
+ } else {
+ avnd_di_node_down_msg_send(cb);
+ }
+ }
done:
TRACE_LEAVE();
return NCSCC_RC_SUCCESS;
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel