Hi Minh,
It can be reproduced. Refer on ticket for more info.
See my respond inline.
-----Original Message-----
From: Minh Hon Chau <minh.c...@dektech.com.au>
Sent: Wednesday, August 12, 2020 4:24 PM
To: Thang Duc Nguyen <thang.d.ngu...@dektech.com.au>; Thuan Tran
<thuan.t...@dektech.com.au>
Cc: opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1/1] amfnd: handle component failover during SURestart
[#3207]
Hi Thang,
Is this reproducible? And one comment inline.
Thanks
Minh
On 7/8/20 12:03 pm, thang.d.nguyen wrote:
During SURestart, another compoenent is failovered. The SU-SI modify
event message need buffering to process later.
The new active assignment on SU need processing after it is
instantiated.
---
src/amf/amfnd/comp.cc | 3 ++-
src/amf/amfnd/susm.cc | 62 ++++++++++++++++++++++++++++++-------------
2 files changed, 46 insertions(+), 19 deletions(-)
diff --git a/src/amf/amfnd/comp.cc b/src/amf/amfnd/comp.cc index
d805346bb..f1e33c372 100644
--- a/src/amf/amfnd/comp.cc
+++ b/src/amf/amfnd/comp.cc
@@ -1083,7 +1083,8 @@ uint32_t avnd_comp_csi_assign(AVND_CB *cb, AVND_COMP
*comp,
if (curr_csi->curr_assign_state ==
AVND_COMP_CSI_ASSIGN_STATE_UNASSIGNED &&
curr_csi->prv_assign_state ==
- AVND_COMP_CSI_ASSIGN_STATE_UNASSIGNED) {
+ AVND_COMP_CSI_ASSIGN_STATE_UNASSIGNED &&
+ !m_AVND_SU_IS_RESTART(comp->su)) {
// Mark suspending_assignment for all unassigned csi(s) which are
// going to be assigned to *curr_csi->comp*
for (t_csi = m_AVND_CSI_REC_FROM_COMP_DLL_NODE_GET(
diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc index
86811f1e4..4ff433ea2 100644
--- a/src/amf/amfnd/susm.cc
+++ b/src/amf/amfnd/susm.cc
@@ -218,10 +218,13 @@ AVND_SU_SIQ_REC *avnd_su_siq_rec_buf(AVND_CB *cb, AVND_SU
*su,
uint32_t rc = NCSCC_RC_SUCCESS;
TRACE_ENTER2("'%s'", su->name.c_str());
- /* buffer the msg, if SU is inst-failed and all comps are not
terminated */
+ /* buffer the msg, if SU is inst-failed and all comps are not
+ terminated or during SuRestart */
if (((su->pres == SA_AMF_PRESENCE_INSTANTIATION_FAILED) &&
(!m_AVND_SU_IS_ALL_TERM(su))) ||
- m_AVND_IS_SHUTTING_DOWN(cb)) {
+ m_AVND_IS_SHUTTING_DOWN(cb) ||
+ (m_AVND_SU_IS_RESTART(su) &&
+ su_all_comps_restartable(*su))) {
siq = avnd_su_siq_rec_add(cb, su, param, &rc);
TRACE_LEAVE();
return siq;
@@ -303,15 +306,18 @@ uint32_t avnd_su_siq_prc(AVND_CB *cb, AVND_SU *su) {
return rc;
}
- /* unlink the buffered msg from the queue */
- ncs_db_link_list_delink(&su->siq, &siq->su_dll_node);
-
/* initiate si asignment / removal */
rc = avnd_su_si_msg_prc(cb, su, &siq->info);
- /* delete the buffered msg */
- avnd_su_siq_rec_del(cb, su, siq);
-
+ // Siq will used to su-si respond later // in case modify SU-SI
+ during SURestart if ((siq->info.msg_act != AVSV_SUSI_ACT_MOD) ||
+ !m_AVND_SU_IS_RESTART(su)) {
+ /* unlink the buffered msg from the queue */
+ ncs_db_link_list_delink(&su->siq, &siq->su_dll_node);
+ /* delete the buffered msg */
+ avnd_su_siq_rec_del(cb, su, siq); }
[M] Is the above change needed?
[Thang]: yes. The purpose to keep the siq info to process later. In case SU is
restarted and receive modify SU-SI event.
TRACE_LEAVE2("%u", rc);
return rc;
}
@@ -1128,6 +1134,7 @@ static bool container_contained_shutdown(const AVND_SU
*su) {
uint32_t avnd_su_si_oper_done(AVND_CB *cb, AVND_SU *su, AVND_SU_SI_REC *si) {
AVND_SU_SI_REC *curr_si = 0;
AVND_COMP_CSI_REC *curr_csi = 0, *t_csi = 0;
+ AVND_SU_SIQ_REC *siq = 0;
uint32_t rc = NCSCC_RC_SUCCESS;
bool opr_done;
@@ -1199,6 +1206,18 @@ uint32_t avnd_su_si_oper_done(AVND_CB *cb, AVND_SU *su, AVND_SU_SI_REC *si) {
if (NCSCC_RC_SUCCESS != rc) goto done;
}
+ // Modify event during SURestart should be respond siq =
+ reinterpret_cast<AVND_SU_SIQ_REC
+ *>(m_NCS_DBLIST_FIND_LAST(&su->siq));
+ if (siq && (siq->info.msg_act == AVSV_SUSI_ACT_MOD) &&
+ m_AVND_SU_IS_RESTART(su)) {
+ ncs_db_link_list_delink(&su->siq, &siq->su_dll_node);
+ /* delete the buffered msg */
+ avnd_su_siq_rec_del(avnd_cb, su, siq);
+ rc = avnd_di_susi_resp_send(cb, su,
+ m_AVND_SU_IS_ALL_SI(su) ? nullptr : si);
+ if (NCSCC_RC_SUCCESS != rc) goto done; }
+
if (si && (cb->term_state == AVND_TERM_STATE_OPENSAF_SHUTDOWN_INITIATED)) {
(void)avnd_evt_last_step_term_evh(cb, nullptr);
} else if (si &&
@@ -1683,16 +1702,23 @@ static uint32_t
pi_su_instantiating_to_instantiated(AVND_SU *su) {
/* reset the su failed flag & set the oper state to enabled */
m_AVND_SU_OPER_STATE_SET(su, SA_AMF_OPERATIONAL_ENABLED);
TRACE("Setting the Oper state to Enabled");
- /*
- * reassign all the sis...
- * it's possible that the si was never assigned. send su-oper
- * enable msg instead.
- */
- if (su->si_list.n_nodes)
- rc = avnd_su_si_reassign(avnd_cb, su);
- else {
- rc = avnd_di_oper_send(avnd_cb, su, 0);
- reset_suRestart_flag(su);
+
+ AVND_SU_SIQ_REC *siq = 0;
+ siq = reinterpret_cast<AVND_SU_SIQ_REC
*>(m_NCS_DBLIST_FIND_LAST(&su->siq));
+ if (siq && (siq->info.msg_act == AVSV_SUSI_ACT_MOD)) {
+ rc = avnd_su_siq_prc(avnd_cb, su);
+ } else {
+ /*
+ * reassign all the sis...
+ * it's possible that the si was never assigned. send su-oper
+ * enable msg instead.
+ */
+ if (su->si_list.n_nodes)
+ rc = avnd_su_si_reassign(avnd_cb, su);
+ else {
+ rc = avnd_di_oper_send(avnd_cb, su, 0);
+ reset_suRestart_flag(su);
+ }
}
su->admin_op_Id = static_cast<SaAmfAdminOperationIdT>(0);
} else {