If su failover happens just around both SCs go down, the faulty su
is auto repaired and can be instantiated during SCs absence period.
As result of this, amfnd buffers oper_state(ENABLED) message. Now
if su escalates to another su failover, amfnd only buffer oper_state
(DISABLED) and there will not be any auto repair for this su.
When a SC restarts, amfnd will now send two oper_state messages, the
ENABLED comes first and is followed by the DISABLED. The fist ENABLED
oper_state message triggers a su instantiation, which interferes the
message sequence with the auto repair triggered from second DISABLED
oper_state message.
Patch ensures only to send the DISABLED oper_state message in all cases,
as the ENABLED oper_state message is also not needed since all su states
are already sent in sync state info messages.
Patch also removes the su_try_repair in cluster.cc which was added in
is not correct since the su recovery is started from amfnd that resends
the buffered DISABLED oper_state message.
---
src/amf/amfd/cluster.cc | 14 --------------
src/amf/amfnd/di.cc | 27 ++++++++++++++++++---------
2 files changed, 18 insertions(+), 23 deletions(-)
diff --git a/src/amf/amfd/cluster.cc b/src/amf/amfd/cluster.cc
index 9ec6746a8..156c5c986 100644
--- a/src/amf/amfd/cluster.cc
+++ b/src/amf/amfd/cluster.cc
@@ -53,7 +53,6 @@ AVD_CLUSTER *avd_cluster = &_avd_cluster;
void avd_cluster_tmr_init_evh(AVD_CL_CB *cb, AVD_EVT *evt) {
TRACE_ENTER();
- AVD_SU *su = nullptr;
AVD_AVND *node = nullptr;
saflog(LOG_NOTICE, amfSvcUsrName,
"Cluster startup timeout, assigning SIs to SUs");
@@ -109,19 +108,6 @@ void avd_cluster_tmr_init_evh(AVD_CL_CB *cb,
AVD_EVT *evt) {
if (i_sg->sg_fsm_state == AVD_SG_FSM_STABLE) i_sg->realign(cb,
i_sg);
}
- if (cb->scs_absence_max_duration > 0) {
- TRACE("check if any SU is auto repair enabled");
-
- for (const auto &value : *su_db) {
- su = value.second;
-
- if (su->list_of_susi == nullptr && su->su_on_node != nullptr &&
- su->su_on_node->saAmfNodeOperState ==
SA_AMF_OPERATIONAL_ENABLED) {
- su_try_repair(su);
- }
- }
- }
-
done:
TRACE_LEAVE();
}
diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
index 34548be47..2dc023cdf 100644
--- a/src/amf/amfnd/di.cc
+++ b/src/amf/amfnd/di.cc
@@ -797,15 +797,20 @@ uint32_t avnd_di_oper_send(AVND_CB *cb, const
AVND_SU *su, uint32_t rcvr) {
msg.info.avd->msg_info.n2d_opr_state.rec_rcvr.raw = rcvr;
if (cb->is_avd_down == true || cb->amfd_sync_required == true) {
- // We are in headless, buffer this msg
- msg.info.avd->msg_info.n2d_opr_state.msg_id = 0;
- if (avnd_diq_rec_add(cb, &msg) == nullptr) {
- rc = NCSCC_RC_FAILURE;
+ if (msg.info.avd->msg_info.n2d_opr_state.su_oper_state ==
+ SA_AMF_OPERATIONAL_DISABLED ||
+ msg.info.avd->msg_info.n2d_opr_state.node_oper_state ==
+ SA_AMF_OPERATIONAL_DISABLED) {
+ // We are in headless, buffer this msg
+ msg.info.avd->msg_info.n2d_opr_state.msg_id = 0;
+ if (avnd_diq_rec_add(cb, &msg) == nullptr) {
+ rc = NCSCC_RC_FAILURE;
+ }
+ LOG_NO(
+ "avnd_di_oper_send() deferred as AMF director is
offline(%d),"
+ " or sync is required(%d)",
+ cb->is_avd_down, cb->amfd_sync_required);
}
- LOG_NO(
- "avnd_di_oper_send() deferred as AMF director is offline(%d),"
- " or sync is required(%d)",
- cb->is_avd_down, cb->amfd_sync_required);
} else {
// We are in normal cluster, send msg to director
msg.info.avd->msg_info.n2d_opr_state.msg_id = ++(cb->snd_msg_id);
@@ -1337,7 +1342,11 @@ void avnd_diq_rec_check_buffered_msg(AVND_CB
*cb) {
// leave in dnd_list
++iter;
continue;
- } else if (rec->msg.info.avd->msg_type ==
AVSV_N2D_OPERATION_STATE_MSG) {
+ } else if (rec->msg.info.avd->msg_type ==
AVSV_N2D_OPERATION_STATE_MSG &&
+ (rec->msg.info.avd->msg_info.n2d_opr_state.su_oper_state ==
+ SA_AMF_OPERATIONAL_DISABLED ||
+ rec->msg.info.avd->msg_info.n2d_opr_state.node_oper_state ==
+ SA_AMF_OPERATIONAL_DISABLED)) {
if (rec->msg.info.avd->msg_info.n2d_opr_state.msg_id != 0) {
rec->msg.info.avd->msg_info.n2d_opr_state.msg_id = 0;
LOG_NO(