osaf/services/saf/amf/amfnd/clc.cc | 12 ++++++
osaf/services/saf/amf/amfnd/sidb.cc | 8 ++++
osaf/services/saf/amf/amfnd/susm.cc | 72 +++++++++++++++++++++++++++++++++++++
3 files changed, 92 insertions(+), 0 deletions(-)
In the reported problem, opensaf shutdown got stuck when one of the components
of NPI su faulted.
During opnesaf shutdown, amfnd started removing assignment from lower rank
SI2 assigned to SU2. During this time, comp of a NPI SU1 having a csi from a
higher rank SI1
faulted. AMFND successfully cleaned up the failed component. Now when all the
assignments
from SU2 got removed, amfnd started removing assignments from SU1. Since SU1
had only one
component which was moved to UNINSTANTIATED state after clean up, no further
clean up
was required. Since no further comp was cleaned up in SU1 got stuck in
TERMINATING state.
In healthy condition when last CSI is removed from a NPI SU, SU will move to
UNINSTANTIATED state. After this if no further lower rank SIs are
available for application SUs, amfnd will launch clean up of all the components.
But since SU1 got stuck in TERMINATING state AMFND could not launch clean up of
all the
comps and after 60 sceconds NID rebooted the node.
Patch fixes the problem by resuming the SU FSM of failed component and removes
further
CSIs. If no CSI is available it will mark the SU UNINSTANTIATED and will go with
the normal sequence of removal and then clean up of all comps.
diff --git a/osaf/services/saf/amf/amfnd/clc.cc
b/osaf/services/saf/amf/amfnd/clc.cc
--- a/osaf/services/saf/amf/amfnd/clc.cc
+++ b/osaf/services/saf/amf/amfnd/clc.cc
@@ -2204,6 +2204,18 @@ uint32_t avnd_comp_clc_terming_cleansucc
}
}
+ if ((!comp->su->is_ncs) && (comp->csi_list.n_nodes > 0) &&
+ (!m_AVND_SU_IS_PREINSTANTIABLE(comp->su))) {
+ AVND_COMP_CSI_REC *csi = nullptr;
+ /*
+ Explantion written above for PI SU case is valid
here also.
+ However for a NPI comp in NPI SU, mark it REMOVED
instead of
+ generating remove done indication.
+ */
+ csi =
m_AVND_CSI_REC_FROM_COMP_DLL_NODE_GET(m_NCS_DBLIST_FIND_FIRST(&comp->csi_list));
+ if (csi != nullptr)
+ m_AVND_COMP_CSI_CURR_ASSIGN_STATE_SET(csi,
AVND_COMP_CSI_ASSIGN_STATE_REMOVED);
+ }
if (all_comps_terminated()) {
LOG_NO("Terminated all AMF components");
LOG_NO("Shutdown completed, exiting");
diff --git a/osaf/services/saf/amf/amfnd/sidb.cc
b/osaf/services/saf/amf/amfnd/sidb.cc
--- a/osaf/services/saf/amf/amfnd/sidb.cc
+++ b/osaf/services/saf/amf/amfnd/sidb.cc
@@ -185,6 +185,7 @@ AVND_SU_SI_REC *avnd_su_si_rec_add(AVND_
/*
* Update the rest of the parameters with default values.
*/
+ TRACE("Marking curr assigned state of '%s'
unassigned.",si_rec->name.value);
m_AVND_SU_SI_CURR_ASSIGN_STATE_SET(si_rec,
AVND_SU_SI_ASSIGN_STATE_UNASSIGNED);
/*
@@ -394,6 +395,7 @@ AVND_COMP_CSI_REC *avnd_su_si_csi_rec_ad
/*
* Update the rest of the parameters with default values.
*/
+ TRACE("Marking curr assigned state of '%s'
unassigned.",csi_rec->name.value);
m_AVND_COMP_CSI_CURR_ASSIGN_STATE_SET(csi_rec,
AVND_COMP_CSI_ASSIGN_STATE_UNASSIGNED);
m_AVND_COMP_CSI_PRV_ASSIGN_STATE_SET(csi_rec,
AVND_COMP_CSI_ASSIGN_STATE_UNASSIGNED);
@@ -477,6 +479,7 @@ AVND_SU_SI_REC *avnd_su_si_rec_modify(AV
/* store the prv assign-state & update the new assign-state */
si_rec->prv_assign_state = si_rec->curr_assign_state;
+ TRACE_1("Marking curr assigned state of '%s'
unassigned.",si_rec->name.value);
m_AVND_SU_SI_CURR_ASSIGN_STATE_SET(si_rec,
AVND_SU_SI_ASSIGN_STATE_UNASSIGNED);
m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, si_rec, AVND_CKPT_SU_SI_REC);
@@ -518,6 +521,7 @@ uint32_t avnd_su_si_csi_rec_modify(AVND_
TRACE_ENTER2("%p", param);
/* pick up all the csis belonging to the si & modify them */
if (!param) {
+ TRACE_1("Marking curr assigned state of all CSIs of '%s'
unassigned.",si_rec->name.value);
for (curr_csi = (AVND_COMP_CSI_REC
*)m_NCS_DBLIST_FIND_FIRST(&si_rec->csi_list);
curr_csi; curr_csi = (AVND_COMP_CSI_REC
*)m_NCS_DBLIST_FIND_NEXT(&curr_csi->si_dll_node)) {
/* store the prv assign-state & update the new
assign-state */
@@ -544,6 +548,7 @@ uint32_t avnd_su_si_csi_rec_modify(AVND_
/* store the prv assign-state & update the new assign-state */
curr_csi->prv_assign_state = curr_csi->curr_assign_state;
+ TRACE("Marking curr assigned state of '%s'
unassigned.",curr_csi->name.value);
m_AVND_COMP_CSI_CURR_ASSIGN_STATE_SET(curr_csi,
AVND_COMP_CSI_ASSIGN_STATE_UNASSIGNED);
m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, curr_csi,
AVND_CKPT_CSI_REC);
} /* for */
@@ -574,6 +579,7 @@ uint32_t avnd_su_si_all_modify(AVND_CB *
TRACE_ENTER2();
/* modify all the si records */
+ TRACE("Marking curr assigned state all SIs in '%s'
unassigned.",su->name.value);
for (curr_si = (AVND_SU_SI_REC *)m_NCS_DBLIST_FIND_FIRST(&su->si_list);
curr_si; curr_si = (AVND_SU_SI_REC
*)m_NCS_DBLIST_FIND_NEXT(&curr_si->su_dll_node)) {
/* store the prv state & update the new state */
@@ -624,6 +630,7 @@ uint32_t avnd_su_si_csi_all_modify(AVND_
TRACE_ENTER2("%p", param);
/* pick up all the csis belonging to all the sis & modify them */
if (!param) {
+ TRACE("Marking curr assigned state all CSIs in SIs of '%s'
unassigned.",su->name.value);
for (curr_si = (AVND_SU_SI_REC
*)m_NCS_DBLIST_FIND_FIRST(&su->si_list);
curr_si; curr_si = (AVND_SU_SI_REC
*)m_NCS_DBLIST_FIND_NEXT(&curr_si->su_dll_node)) {
for (curr_csi = (AVND_COMP_CSI_REC
*)m_NCS_DBLIST_FIND_FIRST(&curr_si->csi_list);
@@ -658,6 +665,7 @@ uint32_t avnd_su_si_csi_all_modify(AVND_
}
if (false == curr_comp->assigned_flag) {
/* modify all the csi-records */
+ TRACE("Marking curr assigned state all CSIs assigned to
'%s' unassigned.",curr_comp->name.value);
for (curr_csi =
m_AVND_CSI_REC_FROM_COMP_DLL_NODE_GET(m_NCS_DBLIST_FIND_FIRST(&curr_comp->csi_list));
curr_csi;
curr_csi =
m_AVND_CSI_REC_FROM_COMP_DLL_NODE_GET(m_NCS_DBLIST_FIND_NEXT(&curr_csi->comp_dll_node)))
diff --git a/osaf/services/saf/amf/amfnd/susm.cc
b/osaf/services/saf/amf/amfnd/susm.cc
--- a/osaf/services/saf/amf/amfnd/susm.cc
+++ b/osaf/services/saf/amf/amfnd/susm.cc
@@ -1917,6 +1917,19 @@ uint32_t avnd_su_pres_st_chng_prc(AVND_C
*/
avnd_di_uns32_upd_send(AVSV_SA_AMF_SU,
saAmfSUOperState_ID, &su->name, su->oper);
}
+
+ if ((prv_st == SA_AMF_PRESENCE_INSTANTIATED) &&
+ (final_st == SA_AMF_PRESENCE_UNINSTANTIATED)
&&
+ (cb->term_state ==
AVND_TERM_STATE_OPENSAF_SHUTDOWN_STARTED)) {
+ /*
+ During shutdown phase, all comps of SU may fault. In
that case,
+ SU FSM marks SU in TERMINAIING state and finally
moves it to
+ UNINSTANTIATED state. So generated the assignment
done indication
+ so that removal of lower rank SI can proceed.
+ */
+ rc = avnd_su_si_oper_done(cb, su, si);
+ m_AVND_SU_ALL_SI_RESET(su);
+ }
}
done:
@@ -2264,6 +2277,21 @@ uint32_t avnd_su_pres_insting_compinstfa
return rc;
}
+/**
+ * @brief Returns first assigned csi traversing from end.
+ * @return Ptr to csi_rec.
+ */
+static AVND_COMP_CSI_REC *get_next_assigned_csi_from_end(const AVND_SU_SI_REC
*si)
+{
+ for (AVND_COMP_CSI_REC *csi = (AVND_COMP_CSI_REC
*)m_NCS_DBLIST_FIND_LAST(&si->csi_list);
+ (csi != nullptr);
+ csi = (AVND_COMP_CSI_REC
*)m_NCS_DBLIST_FIND_PREV(&csi->si_dll_node)) {
+ if (m_AVND_COMP_CSI_CURR_ASSIGN_STATE_IS_ASSIGNED(csi) &&
((csi->comp != nullptr)
+ && (csi->comp->pres ==
SA_AMF_PRESENCE_INSTANTIATED)))
+ return csi;
+ }
+ return nullptr;
+}
/****************************************************************************
Name : avnd_su_pres_inst_suterm_hdler
@@ -2334,6 +2362,32 @@ uint32_t avnd_su_pres_inst_suterm_hdler(
AVND_COMP_CLC_PRES_FSM_EV_CLEANUP :
AVND_COMP_CLC_PRES_FSM_EV_TERM);
if (NCSCC_RC_SUCCESS != rc)
goto done;
+
+ /*
+ During shutdown phase if a component faults, it will be
cleaned up by AMFND
+ irrespective of recovery policy. This component will move to
UNINSTANTIATED
+ after successful clean up. When amfnd starts removing SI
from SU of this comp,
+ it will have to skip the CSI of cleaned up component.
+ */
+ if ((csi->comp->pres == SA_AMF_PRESENCE_UNINSTANTIATED) &&
+ (cb->term_state ==
AVND_TERM_STATE_OPENSAF_SHUTDOWN_STARTED)) {
+ m_AVND_COMP_CSI_CURR_ASSIGN_STATE_SET(csi,
AVND_COMP_CSI_ASSIGN_STATE_REMOVED);
+ avnd_su_pres_state_set(su, SA_AMF_PRESENCE_TERMINATING);
+ AVND_COMP_CSI_REC *assigned_csi =
get_next_assigned_csi_from_end(si);
+ if (assigned_csi == nullptr) {
+ //Components of all the CSIs in SI are cleaned
up.
+ avnd_su_pres_state_set(su,
SA_AMF_PRESENCE_UNINSTANTIATED);
+ goto done;
+ } else {
+ //One CSI is still assigned.
+
m_AVND_COMP_CSI_CURR_ASSIGN_STATE_SET(assigned_csi,
+
AVND_COMP_CSI_ASSIGN_STATE_REMOVING);
+ rc = avnd_comp_clc_fsm_trigger(cb,
assigned_csi->comp,
+
(m_AVND_COMP_IS_FAILED(assigned_csi->comp)) ?
+ AVND_COMP_CLC_PRES_FSM_EV_CLEANUP :
+ AVND_COMP_CLC_PRES_FSM_EV_TERM);
+ }
+ }
}
/* transition to terminating state */
@@ -2937,6 +2991,24 @@ uint32_t avnd_su_pres_terming_compuninst
if (all_csis_in_assigned_state(su) ||
all_csis_in_removed_state(su)) {
TRACE("SI Assignment done");
avnd_su_pres_state_set(su,
SA_AMF_PRESENCE_UNINSTANTIATED);
+ goto done;
+ }
+
+ /*
+ During shutdown phase if a component faults, it will be
cleaned up by AMFND
+ irrespective of recovery policy. This component will move to
UNINSTANTIATED
+ after successful clean up. When amfnd starts removing SI
from SU of this comp,
+ it will have to skip the CSI of cleaned up component.
+ */
+ if ((curr_csi != NULL) && (curr_csi->comp->pres ==
SA_AMF_PRESENCE_UNINSTANTIATED) &&
+ (cb->term_state ==
AVND_TERM_STATE_OPENSAF_SHUTDOWN_STARTED)) {
+ m_AVND_COMP_CSI_CURR_ASSIGN_STATE_SET(curr_csi,
AVND_COMP_CSI_ASSIGN_STATE_REMOVED);
+ AVND_COMP_CSI_REC *assigned_csi =
get_next_assigned_csi_from_end(curr_csi->si);
+ m_AVND_COMP_CSI_CURR_ASSIGN_STATE_SET(assigned_csi,
AVND_COMP_CSI_ASSIGN_STATE_REMOVING);
+ rc = avnd_comp_clc_fsm_trigger(cb, assigned_csi->comp,
+
(m_AVND_COMP_IS_FAILED(assigned_csi->comp)) ?
+ AVND_COMP_CLC_PRES_FSM_EV_CLEANUP :
+ AVND_COMP_CLC_PRES_FSM_EV_TERM);
}
}
------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel