osaf/services/saf/amf/amfnd/clc.cc  |  5 ++++-
 osaf/services/saf/amf/amfnd/err.cc  |  2 --
 osaf/services/saf/amf/amfnd/susm.cc |  5 ++++-
 3 files changed, 8 insertions(+), 4 deletions(-)


In thre reported problem, lock operation on SU got timeout when a quiesced comp 
fault with
su-failover recovery.

AMFND calls avnd_err_su_repair() to repair the SU when su-failover recovery is 
going on.
When quiesced comp faults with su-failover recovery, AMFND launches cleaup of 
components.
In the meantime, AMFND gets removal of assignments and as a part of oper done 
it deletes SUSI
and callsavnd_err_su_repair(). Inside this function AMFND tries to instantiate 
UNINSTANTIATED comps.
No componnet is instantiated as they are in TERMINATING state. But SU_FAILOVER 
flag is reset inside
this function. Since AMFND clears the flag, it loses the context of su-failover 
escalation.
When first comp is cleaned up, AMFND instantiates it and thus the condition of 
all components
are terminated for informing AMFD about su-failover escalation is not met. 
Because of this
AMFD never responds for lock operation and it gets timed out.

As a part of fix AMFND does not call avnd_err_su_repair() during su-failover 
escalation and
also does not reset SU_FAILOVER flag for comp-failover recovery inside this 
function.

diff --git a/osaf/services/saf/amf/amfnd/clc.cc 
b/osaf/services/saf/amf/amfnd/clc.cc
--- a/osaf/services/saf/amf/amfnd/clc.cc
+++ b/osaf/services/saf/amf/amfnd/clc.cc
@@ -2221,7 +2221,7 @@ uint32_t avnd_comp_clc_terming_termfail_
 ******************************************************************************/
 uint32_t avnd_comp_clc_terming_cleansucc_hdler(AVND_CB *cb, AVND_COMP *comp)
 {
-       const AVND_SU *su = comp->su;
+       AVND_SU *su = comp->su;
        uint32_t rc = NCSCC_RC_SUCCESS;
        TRACE_ENTER2("'%s': Cleanup success event in the terminating state", 
comp->name.value);
 
@@ -2294,6 +2294,9 @@ uint32_t avnd_comp_clc_terming_cleansucc
                        (m_AVND_SU_IS_FAILOVER(su))) {
                /* yes, request director to orchestrate component failover */
                rc = avnd_di_oper_send(cb, su, SA_AMF_COMPONENT_FAILOVER);
+
+               //Reset component-failover here. SU failover is reset as part 
of REPAIRED admin op.
+               m_AVND_SU_FAILOVER_RESET(su);
        }
 
        /*
diff --git a/osaf/services/saf/amf/amfnd/err.cc 
b/osaf/services/saf/amf/amfnd/err.cc
--- a/osaf/services/saf/amf/amfnd/err.cc
+++ b/osaf/services/saf/amf/amfnd/err.cc
@@ -1082,8 +1082,6 @@ uint32_t avnd_err_su_repair(AVND_CB *cb,
        if (all_comps_terminated_in_su(su) == true)
                is_uninst = true;
 
-       //Reset component-failover here. SU failover is reset as part of 
REPAIRED admin op.
-       m_AVND_SU_FAILOVER_RESET(su);
        /* scan & instantiate failed pi comps */
        for (comp = 
m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_FIRST(&su->comp_list));
             comp; comp = 
m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_NEXT(&comp->su_dll_node))) {
diff --git a/osaf/services/saf/amf/amfnd/susm.cc 
b/osaf/services/saf/amf/amfnd/susm.cc
--- a/osaf/services/saf/amf/amfnd/susm.cc
+++ b/osaf/services/saf/amf/amfnd/susm.cc
@@ -1135,9 +1135,12 @@ uint32_t avnd_su_si_oper_done(AVND_CB *c
 
                /* 
                 * Removal signifies an end to the recovery phase. Initiate 
repair
-                * unless a NODE level recovery action is in progress.
+                * unless a NODE level or su-failover recovery action is in 
progress.
+                * Also repair must be done after informing to AMFD about 
comp-failover and
+                * after removal of assignment.
                 */
                if (m_AVND_SU_IS_FAILED(su) && !su->si_list.n_nodes &&
+                     (!m_AVND_SU_IS_FAILOVER(su)) && (su->sufailover == false) 
&&
                    (cb->oper_state == SA_AMF_OPERATIONAL_ENABLED) && 
(!m_AVND_SU_IS_RESTART(su)))
                        rc = avnd_err_su_repair(cb, su);
        }

------------------------------------------------------------------------------
What NetFlow Analyzer can do for you? Monitors network bandwidth and traffic
patterns at an interface-level. Reveals which users, apps, and protocols are 
consuming the most bandwidth. Provides multi-vendor support for NetFlow, 
J-Flow, sFlow and other flows. Make informed decisions using capacity 
planning reports. https://ad.doubleclick.net/ddm/clk/305295220;132659582;e
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to