osaf/services/saf/amf/amfnd/clc.cc | 3 +-
osaf/services/saf/amf/amfnd/su.cc | 1 -
osaf/services/saf/amf/amfnd/susm.cc | 56 +++++++-----------------------------
3 files changed, 13 insertions(+), 47 deletions(-)
Possible split brain on application level and spec violation.
The AMF node director requests a comp/SU failover from the AMF
director despite that a comp is in TERM-FAILED presence state.
Disable the SU and let the AMF director handle possible node
reboot or manual repair.
Repair works to the point components are instantiated again but not re-assigned.
diff --git a/osaf/services/saf/amf/amfnd/clc.cc
b/osaf/services/saf/amf/amfnd/clc.cc
--- a/osaf/services/saf/amf/amfnd/clc.cc
+++ b/osaf/services/saf/amf/amfnd/clc.cc
@@ -927,8 +927,7 @@ uint32_t avnd_comp_clc_st_chng_prc(AVND_
}
if ((SA_AMF_PRESENCE_RESTARTING == prv_st) &&
- ((SA_AMF_PRESENCE_INSTANTIATION_FAILED == final_st) ||
- (SA_AMF_PRESENCE_TERMINATION_FAILED == final_st))) {
+ (SA_AMF_PRESENCE_INSTANTIATION_FAILED == final_st)) {
avnd_instfail_su_failover(cb, comp->su, comp);
}
diff --git a/osaf/services/saf/amf/amfnd/su.cc
b/osaf/services/saf/amf/amfnd/su.cc
--- a/osaf/services/saf/amf/amfnd/su.cc
+++ b/osaf/services/saf/amf/amfnd/su.cc
@@ -645,7 +645,6 @@ uint32_t avnd_evt_su_admin_op_req(AVND_C
m_AVND_SU_STATE_RESET(su);
m_AVND_SU_OPER_STATE_SET(su, SA_AMF_OPERATIONAL_ENABLED);
- avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSUOperState_ID,
&su->name, su->oper);
avnd_su_pres_state_set(su, SA_AMF_PRESENCE_UNINSTANTIATED);
rc = avnd_di_oper_send(cb, su, 0);
diff --git a/osaf/services/saf/amf/amfnd/susm.cc
b/osaf/services/saf/amf/amfnd/susm.cc
--- a/osaf/services/saf/amf/amfnd/susm.cc
+++ b/osaf/services/saf/amf/amfnd/susm.cc
@@ -1542,16 +1542,13 @@ uint32_t avnd_su_pres_st_chng_prc(AVND_C
goto done;
}
- /* instantiating -> term-failed */
- if ((SA_AMF_PRESENCE_INSTANTIATING == prv_st) &&
- (SA_AMF_PRESENCE_TERMINATION_FAILED ==
final_st)) {
- TRACE("SU Instantiating -> Termination Failed");
+ /* xxx -> term-failed */
+ if (final_st == SA_AMF_PRESENCE_TERMINATION_FAILED) {
m_AVND_SU_OPER_STATE_SET(su,
SA_AMF_OPERATIONAL_DISABLED);
m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su,
AVND_CKPT_SU_OPER_STATE);
- /* Don't send su-oper state msg, just update su oper
state
- * AMF has lost control over this component and the
operator needs
- * to repair this node. Failover is not possible in
this state. */
- avnd_di_uns32_upd_send(AVSV_SA_AMF_SU,
saAmfSUOperState_ID, &su->name, su->oper);
+ rc = avnd_di_oper_send(cb, su, 0);
+ if (NCSCC_RC_SUCCESS != rc)
+ goto done;
}
/* instantiated/restarting -> inst-failed */
@@ -1571,27 +1568,6 @@ uint32_t avnd_su_pres_st_chng_prc(AVND_C
else
TRACE("SU oper state is disabled");
}
-
- /* terminating -> term-failed */
- if (((prv_st == SA_AMF_PRESENCE_RESTARTING) ||
(SA_AMF_PRESENCE_TERMINATING == prv_st))
- && (SA_AMF_PRESENCE_TERMINATION_FAILED ==
final_st)) {
- TRACE("Terminating -> Termination Failed");
- if (sufailover_in_progress(su)) {
- /*Do not reset any flag, this will be done as a
part of repair.*/
- rc = avnd_di_oper_send(cb, su,
AVSV_ERR_RCVR_SU_FAILOVER);
- osafassert(NCSCC_RC_SUCCESS == rc);
- avnd_su_si_del(avnd_cb, &su->name);
- goto done;
- }
- m_AVND_SU_OPER_STATE_SET(su,
SA_AMF_OPERATIONAL_DISABLED);
- m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su,
AVND_CKPT_SU_OPER_STATE);
- /* inform AvD about oper state change */
- rc = avnd_di_oper_send(cb, su,
SA_AMF_COMPONENT_FAILOVER);
- if (NCSCC_RC_SUCCESS != rc)
- goto done;
-
- }
-
}
/* npi su */
@@ -1663,24 +1639,16 @@ uint32_t avnd_su_pres_st_chng_prc(AVND_C
}
}
- /* terminating/instantiated/restarting -> term-failed */
- if (((SA_AMF_PRESENCE_TERMINATING == prv_st) ||
- (SA_AMF_PRESENCE_INSTANTIATED == prv_st) ||
- (SA_AMF_PRESENCE_RESTARTING == prv_st)) &&
(SA_AMF_PRESENCE_TERMINATION_FAILED == final_st)) {
- TRACE("Terminating/Instantiated/Restarting ->
Termination Failed");
- if (sufailover_in_progress(su)) {
- /*Do not reset any flag, this will be done as
a part of repair.*/
- rc = avnd_di_oper_send(cb, su,
AVSV_ERR_RCVR_SU_FAILOVER);
- osafassert(NCSCC_RC_SUCCESS == rc);
- avnd_su_si_del(avnd_cb, &su->name);
- goto done;
- }
+ /* xxx -> term-failed */
+ if (final_st == SA_AMF_PRESENCE_TERMINATION_FAILED) {
m_AVND_SU_OPER_STATE_SET(su,
SA_AMF_OPERATIONAL_DISABLED);
m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su,
AVND_CKPT_SU_OPER_STATE);
- /* inform AvD about oper state change */
- rc = avnd_di_oper_send(cb, su,
SA_AMF_COMPONENT_FAILOVER);
+ /* Don't send su-oper state msg, just update su oper
state
+ * AMF has lost control over this component and the
operator needs
+ * to repair this node. Failover is not possible in
this state. */
+ avnd_di_uns32_upd_send(AVSV_SA_AMF_SU,
saAmfSUOperState_ID, &su->name, su->oper);
- /* si assignment/removal failed.. inform AvD */
+ /* SI removal failed.. inform director */
rc = avnd_di_susi_resp_send(cb, su,
m_AVND_SU_IS_ALL_SI(su) ? 0 : si);
}
}
------------------------------------------------------------------------------
HPCC Systems Open Source Big Data Platform from LexisNexis Risk Solutions
Find What Matters Most in Your Big Data with HPCC Systems
Open Source. Fast. Scalable. Simple. Ideal for Dirty Data.
Leverages Graph Analysis for Fast Processing & Easy Data Exploration
http://www.hpccsystems.com
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel