Hi Minh,
I am using 1725_pending_review.tgz
(1725_02_V2_bugfix_01_resend_buffer_in_set_leds.diff,
1725_02_V2_bugfix_02_honor_clusterinit_nodesync_timer.diff,
1725_02_V2_bugfix_03_restore_ng_admin.diff,
1725_03_V4_failover_absent_susi_longDn.diff,
1725_04_V2_headless_validation.diff, 1725_05_V2_resend_oper_state.diff,
1725_06a_fullscope_escalation_headless.diff).
I am doing basic node reboot validation testing with no faults.
Configuration: SU1(act) and SU2(stanby) both on PL-3.
TC #1: Start SC-1, PL-3 and PL-5: Unlock SU1 and SU2. Stop SC-1 and stop PL-3,
start PL-3 and start SC-1.
After SC-1 and PL-3 comes back, ideally SU1 and SU2 should get assignments as
Act and Std, but no assignment are being given to SUs on PL-3 and it shows
following in status:
Only Su2 has Std assignment.
safSISU=safSu=SC-1\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed1,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=PL-5\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed2,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=SU2\,safSg=AmfDemo_2N\,safApp=AmfDemo1,safSi=AmfDemo1,safApp=AmfDemo1
saAmfSISUHAState=STANDBY(2)
safSISU=safSu=SC-1\,safSg=2N\,safApp=OpenSAF,safSi=SC-2N,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=PL-3\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed3,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
TC #2: Configuration same as TC#1. Stop PL-3 and don't start. The same issue:
safSISU=safSu=PL-5\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed3,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=SU2\,safSg=AmfDemo_2N\,safApp=AmfDemo1,safSi=AmfDemo1,safApp=AmfDemo1
saAmfSISUHAState=STANDBY(2)
safSISU=safSu=SC-1\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed2,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=SC-1\,safSg=2N\,safApp=OpenSAF,safSi=SC-2N,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
TC #3: Configured SU1(Act) on PL-3 and SU2(Std) on PL-4.
Stop SC-1, stop PL-3 and PL-4, but PL-5 is running. start SC-1, the same issue.
TC #4: Same as TC #3, but SU3 configured on PL-5 as spare. SU3 doesn't get any
assignment and Sg is unstable.
Thanks
-Nagu
> -----Original Message-----
> From: Minh Hon Chau [mailto:[email protected]]
> Sent: 18 August 2016 05:46
> To: [email protected]; Nagendra Kumar; Praveen Malviya;
> [email protected]; [email protected];
> [email protected]
> Cc: [email protected]
> Subject: [PATCH 2 of 4] AMFND: Admin operation continuation if csi
> completes during headless [#1725 part 1] V1
>
> osaf/services/saf/amf/amfnd/di.cc | 199
> +++++++++++++++++--------
> osaf/services/saf/amf/amfnd/include/avnd_di.h | 1 +
> 2 files changed, 134 insertions(+), 66 deletions(-)
>
>
> There're two options basically that AMFD can continue admin operation wih
> completed csi(s)
>
> First: AMFD can use the sync SUSI fsm state as latest, AMFD then has to
> explore its SUSI assignments with adminStates of relevant entities to
> determine which SU should be on call of susi_success(). Deeper level of
> exploration for csi addition. It also depends on SG Fsm state which is being
> used variously in different SG types.
>
> Second: AMFD uses the SUSI fsm state read from IMM as latest, and AMFND
> needs to resend susi_resp messages which were deferred during headless so
> that AMFD can continue the admin operation sequence. Both cases of csi
> completion [during or after] headless can run in the same code flow.
>
> The patch buffers susi_resp_msg during headless stage and resend it to
> AMFD after headless. There could be a chance that AMFND sent out susi
> response message but AMFD could not receive or process it. This case could
> be seen as a defect, which can be fixed by securing the result of sending
> susi_resp message from AMFND toward AMFD.
>
> diff --git a/osaf/services/saf/amf/amfnd/di.cc
> b/osaf/services/saf/amf/amfnd/di.cc
> --- a/osaf/services/saf/amf/amfnd/di.cc
> +++ b/osaf/services/saf/amf/amfnd/di.cc
> @@ -805,11 +805,6 @@ uint32_t avnd_di_susi_resp_send(AVND_CB
> if (cb->term_state ==
> AVND_TERM_STATE_OPENSAF_SHUTDOWN_STARTED)
> return rc;
>
> - if (cb->is_avd_down == true) {
> - m_AVND_SU_ALL_SI_RESET(su);
> - return rc;
> - }
> -
> // should be in assignment pending state to be here
> osafassert(m_AVND_SU_IS_ASSIGN_PEND(su));
>
> @@ -820,64 +815,76 @@ uint32_t avnd_di_susi_resp_send(AVND_CB
> TRACE_ENTER2("Sending Resp su=%s, si=%s, curr_state=%u,
> prv_state=%u", su->name.value, curr_si->name.value,curr_si-
> >curr_state,curr_si->prv_state);
> /* populate the susi resp msg */
> msg.info.avd = new AVSV_DND_MSG();
> - msg.type = AVND_MSG_AVD;
> - msg.info.avd->msg_type = AVSV_N2D_INFO_SU_SI_ASSIGN_MSG;
> - msg.info.avd->msg_info.n2d_su_si_assign.msg_id = ++(cb-
> >snd_msg_id);
> - msg.info.avd->msg_info.n2d_su_si_assign.node_id = cb-
> >node_info.nodeId;
> - if (si) {
> - msg.info.avd->msg_info.n2d_su_si_assign.single_csi =
> - ((si->single_csi_add_rem_in_si ==
> AVSV_SUSI_ACT_BASE) ?
> false : true);
> - }
> - TRACE("curr_assign_state '%u'", curr_si->curr_assign_state);
> - msg.info.avd->msg_info.n2d_su_si_assign.msg_act =
> - (m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si) ||
> - m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNING(curr_si)) ?
> - ((!curr_si->prv_state) ? AVSV_SUSI_ACT_ASGN :
> AVSV_SUSI_ACT_MOD) : AVSV_SUSI_ACT_DEL;
> - msg.info.avd->msg_info.n2d_su_si_assign.su_name = su->name;
> - if (si) {
> - msg.info.avd->msg_info.n2d_su_si_assign.si_name = si->name;
> - if (AVSV_SUSI_ACT_ASGN == si->single_csi_add_rem_in_si) {
> - TRACE("si->curr_assign_state '%u'", curr_si-
> >curr_assign_state);
> - msg.info.avd->msg_info.n2d_su_si_assign.msg_act =
> -
> (m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si) ||
> -
> m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNING(curr_si)) ?
> - AVSV_SUSI_ACT_ASGN : AVSV_SUSI_ACT_DEL;
> - }
> - }
> - msg.info.avd->msg_info.n2d_su_si_assign.ha_state =
> - (SA_AMF_HA_QUIESCING == curr_si->curr_state) ?
> SA_AMF_HA_QUIESCED : curr_si->curr_state;
> - msg.info.avd->msg_info.n2d_su_si_assign.error =
> - (m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si) ||
> - m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_REMOVED(curr_si)) ?
> NCSCC_RC_SUCCESS : NCSCC_RC_FAILURE;
> + msg.type = AVND_MSG_AVD;
> + msg.info.avd->msg_type = AVSV_N2D_INFO_SU_SI_ASSIGN_MSG;
> + msg.info.avd->msg_info.n2d_su_si_assign.node_id = cb-
> >node_info.nodeId;
> + if (si) {
> + msg.info.avd->msg_info.n2d_su_si_assign.single_csi =
> + ((si->single_csi_add_rem_in_si ==
> AVSV_SUSI_ACT_BASE) ? false : true);
> + }
> + TRACE("curr_assign_state '%u'", curr_si->curr_assign_state);
> + msg.info.avd->msg_info.n2d_su_si_assign.msg_act =
> +
> (m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si) ||
> +
> m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNING(curr_si)) ?
> + ((!curr_si->prv_state) ?
> AVSV_SUSI_ACT_ASGN : AVSV_SUSI_ACT_MOD) : AVSV_SUSI_ACT_DEL;
> + msg.info.avd->msg_info.n2d_su_si_assign.su_name = su->name;
> + if (si) {
> + msg.info.avd->msg_info.n2d_su_si_assign.si_name = si-
> >name;
> + if (AVSV_SUSI_ACT_ASGN == si->single_csi_add_rem_in_si) {
> + TRACE("si->curr_assign_state '%u'", curr_si-
> >curr_assign_state);
> + msg.info.avd-
> >msg_info.n2d_su_si_assign.msg_act =
> +
> (m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si) ||
> +
> m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNING(curr_si)) ?
> + AVSV_SUSI_ACT_ASGN :
> AVSV_SUSI_ACT_DEL;
> + }
> + }
> + msg.info.avd->msg_info.n2d_su_si_assign.ha_state =
> + (SA_AMF_HA_QUIESCING == curr_si->curr_state) ?
> SA_AMF_HA_QUIESCED : curr_si->curr_state;
> + msg.info.avd->msg_info.n2d_su_si_assign.error =
> +
> (m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si) ||
> +
> m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_REMOVED(curr_si)) ?
> +NCSCC_RC_SUCCESS : NCSCC_RC_FAILURE;
>
> - if (msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
> AVSV_SUSI_ACT_ASGN)
> - osafassert(si);
> + if (msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
> AVSV_SUSI_ACT_ASGN)
> + osafassert(si);
>
> - /* send the msg to AvD */
> - TRACE("Sending. msg_id'%u', node_id'%u', msg_act'%u', su'%s', si'%s',
> ha_state'%u', error'%u', single_csi'%u'",
> - msg.info.avd->msg_info.n2d_su_si_assign.msg_id, msg.info.avd-
> >msg_info.n2d_su_si_assign.node_id,
> - msg.info.avd->msg_info.n2d_su_si_assign.msg_act, msg.info.avd-
> >msg_info.n2d_su_si_assign.su_name.value,
> - msg.info.avd->msg_info.n2d_su_si_assign.si_name.value,
> msg.info.avd->msg_info.n2d_su_si_assign.ha_state,
> - msg.info.avd->msg_info.n2d_su_si_assign.error, msg.info.avd-
> >msg_info.n2d_su_si_assign.single_csi);
> + /* send the msg to AvD */
> + TRACE("Sending. msg_id'%u', node_id'%u', msg_act'%u', su'%s',
> si'%s', ha_state'%u', error'%u', single_csi'%u'",
> + msg.info.avd->msg_info.n2d_su_si_assign.msg_id,
> msg.info.avd->msg_info.n2d_su_si_assign.node_id,
> + msg.info.avd->msg_info.n2d_su_si_assign.msg_act,
> msg.info.avd->msg_info.n2d_su_si_assign.su_name.value,
> + msg.info.avd->msg_info.n2d_su_si_assign.si_name.value,
> msg.info.avd->msg_info.n2d_su_si_assign.ha_state,
> + msg.info.avd->msg_info.n2d_su_si_assign.error,
> +msg.info.avd->msg_info.n2d_su_si_assign.single_csi);
>
> - if ((su->si_list.n_nodes > 1) && (si == nullptr)) {
> - if (msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
> AVSV_SUSI_ACT_DEL)
> - LOG_NO("Removed 'all SIs' from '%s'",
> su->name.value);
> + if ((su->si_list.n_nodes > 1) && (si == nullptr)) {
> + if (msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
> AVSV_SUSI_ACT_DEL)
> + LOG_NO("Removed 'all SIs' from '%s'", su-
> >name.value);
>
> - if (msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
> AVSV_SUSI_ACT_MOD)
> - LOG_NO("Assigned 'all SIs' %s of '%s'",
> - ha_state[msg.info.avd-
> >msg_info.n2d_su_si_assign.ha_state],
> - su->name.value);
> - }
> + if (msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
> AVSV_SUSI_ACT_MOD)
> + LOG_NO("Assigned 'all SIs' %s of '%s'",
> + ha_state[msg.info.avd-
> >msg_info.n2d_su_si_assign.ha_state],
> + su->name.value);
> + }
>
> - rc = avnd_di_msg_send(cb, &msg);
> - if (NCSCC_RC_SUCCESS == rc)
> - msg.info.avd = 0;
> -
> - /* we have completed the SU SI msg processing */
> - if (su_assign_state_is_stable(su))
> - m_AVND_SU_ASSIGN_PEND_RESET(su);
> - m_AVND_SU_ALL_SI_RESET(su);
> + if (cb->is_avd_down == true) {
> + // We are in headless, buffer this msg
> + msg.info.avd->msg_info.n2d_su_si_assign.msg_id = 0;
> + if (avnd_diq_rec_add(cb, &msg) == nullptr) {
> + rc = NCSCC_RC_FAILURE;
> + }
> + m_AVND_SU_ALL_SI_RESET(su);
> + LOG_NO("avnd_di_susi_resp_send() deferred as AMF
> director is offline");
> + } else {
> + // We are in normal cluster, send msg to director
> + msg.info.avd->msg_info.n2d_su_si_assign.msg_id = ++(cb-
> >snd_msg_id);
> + /* send the msg to AvD */
> + rc = avnd_di_msg_send(cb, &msg);
> + if (NCSCC_RC_SUCCESS == rc)
> + msg.info.avd = 0;
> + /* we have completed the SU SI msg processing */
> + if (su_assign_state_is_stable(su)) {
> + m_AVND_SU_ASSIGN_PEND_RESET(su);
> + }
> + m_AVND_SU_ALL_SI_RESET(su);
> + }
>
> /* free the contents of avnd message */
> avnd_msg_content_free(cb, &msg);
> @@ -1256,14 +1263,7 @@ void avnd_diq_rec_del(AVND_CB *cb, AVND_
> /* stop the AvD msg response timer */
> if (m_AVND_TMR_IS_ACTIVE(rec->resp_tmr)) {
> m_AVND_TMR_MSG_RESP_STOP(cb, *rec);
> - // Resend msgs from queue because amfd dropped during
> sync
> - if ((cb->dnd_list.head != nullptr)) {
> - TRACE("retransmit message to amfd");
> - AVND_DND_MSG_LIST *pending_rec = 0;
> - for (pending_rec = cb->dnd_list.head; pending_rec !=
> nullptr; pending_rec = pending_rec->next) {
> - avnd_diq_rec_send(cb, pending_rec);
> - }
> - }
> + avnd_diq_rec_send_buffered_msg(cb);
> /* resend pg start track */
> avnd_di_resend_pg_start_track(cb);
> }
> @@ -1276,6 +1276,73 @@ void avnd_diq_rec_del(AVND_CB *cb, AVND_
> TRACE_LEAVE();
> return;
> }
> +/************************************************************
> ****************
> + Name : avnd_diq_rec_send_buffered_msg
> +
> + Description : Resend buffered msg
> +
> + Arguments : cb - ptr to the AvND control block
> +
> + Return Values : None.
> +
> + Notes : None.
> +*************************************************************
> **********
> +*******/ void avnd_diq_rec_send_buffered_msg(AVND_CB *cb) {
> + TRACE_ENTER();
> + // Resend msgs from queue because amfnd dropped during headless
> + // or headless-synchronization
> + if ((cb->dnd_list.head != nullptr)) {
> + AVND_DND_MSG_LIST *pending_rec = 0;
> + TRACE("Attach msg_id of buffered msg");
> + bool found = true;
> + while (found) {
> + found = false;
> + for (pending_rec = cb->dnd_list.head; pending_rec !=
> nullptr; pending_rec = pending_rec->next) {
> + if (pending_rec->msg.type ==
> AVND_MSG_AVD) {
> + // At this moment, only oper_state
> msg needs to report to director
> + if (pending_rec->msg.info.avd-
> >msg_type == AVSV_N2D_INFO_SU_SI_ASSIGN_MSG &&
> + pending_rec->msg.info.avd-
> >msg_info.n2d_su_si_assign.msg_id == 0) {
> + m_AVND_DIQ_REC_POP(cb,
> pending_rec); #if 0
> + // only resend if this SUSI
> does exist
> + AVND_SU *su =
> m_AVND_SUDB_REC_GET(cb->sudb,
> + pending_rec-
> >msg.info.avd->msg_info.n2d_su_si_assign.su_name);
> + if (su != nullptr && su-
> >si_list.n_nodes > 0) { #endif
> + pending_rec-
> >msg.info.avd->msg_info.n2d_su_si_assign.msg_id = ++(cb->snd_msg_id);
> +
> m_AVND_DIQ_REC_PUSH(cb, pending_rec);
> + LOG_NO("Found and
> resend buffered su_si_assign msg for SU:'%s', "
> +
> "SI:'%s', ha_state:'%u', msg_act:'%u', single_csi:'%u', "
> +
> "error:'%u', msg_id:'%u'",
> +
> pending_rec->msg.info.avd-
> >msg_info.n2d_su_si_assign.su_name.value,
> +
> pending_rec->msg.info.avd-
> >msg_info.n2d_su_si_assign.si_name.value,
> +
> pending_rec->msg.info.avd->msg_info.n2d_su_si_assign.ha_state,
> +
> pending_rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_act,
> +
> pending_rec->msg.info.avd->msg_info.n2d_su_si_assign.single_csi,
> +
> pending_rec->msg.info.avd->msg_info.n2d_su_si_assign.error,
> +
> pending_rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id);
> +
> +#if 0
> + } else {
> +
> avnd_msg_content_free(cb, &pending_rec->msg);
> + delete pending_rec;
> + pending_rec = cb-
> >dnd_list.head;
> + }
> +#endif
> + found = true;
> + }
> + }
> + }
> + }
> + TRACE("retransmit message to amfd");
> + for (pending_rec = cb->dnd_list.head; pending_rec != nullptr;
> pending_rec = pending_rec->next) {
> + avnd_diq_rec_send(cb, pending_rec);
> + }
> + }
> + TRACE_LEAVE();
> + return;
> +}
>
>
> /*************************************************************
> ***************
> Name : avnd_diq_rec_send
> diff --git a/osaf/services/saf/amf/amfnd/include/avnd_di.h
> b/osaf/services/saf/amf/amfnd/include/avnd_di.h
> --- a/osaf/services/saf/amf/amfnd/include/avnd_di.h
> +++ b/osaf/services/saf/amf/amfnd/include/avnd_di.h
> @@ -79,6 +79,7 @@ void avnd_di_msg_ack_process(struct avnd void
> avnd_diq_del(struct avnd_cb_tag *); AVND_DND_MSG_LIST
> *avnd_diq_rec_add(struct avnd_cb_tag *cb, AVND_MSG *msg); void
> avnd_diq_rec_del(struct avnd_cb_tag *cb, AVND_DND_MSG_LIST *rec);
> +void avnd_diq_rec_send_buffered_msg(struct avnd_cb_tag *cb);
> uint32_t avnd_diq_rec_send(struct avnd_cb_tag *cb, AVND_DND_MSG_LIST
> *rec); uint32_t avnd_di_reg_su_rsp_snd(struct avnd_cb_tag *cb, SaNameT
> *su_name, uint32_t ret_code); uint32_t avnd_di_ack_nack_msg_send(struct
> avnd_cb_tag *cb, uint32_t rcv_id, uint32_t view_num);
------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel