osaf/services/saf/amf/amfnd/clc.cc            |  9 +++++++++
 osaf/services/saf/amf/amfnd/di.cc             |  2 +-
 osaf/services/saf/amf/amfnd/err.cc            |  9 +++++----
 osaf/services/saf/amf/amfnd/include/avnd_di.h |  2 +-
 4 files changed, 16 insertions(+), 6 deletions(-)


If a component error is detected and the recovery action is COMPONENT_FAILOVER,
it is possible that a standby component gets the active assignment before the
erroneous component has been terminated. This can cause a split brain on
application level.

The reason for this is that when the error is detected amfnd starts two
parallel activities, component cleanup and inform director. When the director
receives the information it starts the process of failing over the workload
of the erroneous component.

This patch informs the director after successful termination has been performed.

diff --git a/osaf/services/saf/amf/amfnd/clc.cc 
b/osaf/services/saf/amf/amfnd/clc.cc
--- a/osaf/services/saf/amf/amfnd/clc.cc
+++ b/osaf/services/saf/amf/amfnd/clc.cc
@@ -2024,6 +2024,7 @@ uint32_t avnd_comp_clc_terming_termfail_
 ******************************************************************************/
 uint32_t avnd_comp_clc_terming_cleansucc_hdler(AVND_CB *cb, AVND_COMP *comp)
 {
+       const AVND_SU *su = comp->su;
        uint32_t rc = NCSCC_RC_SUCCESS;
        TRACE_ENTER2("'%s': Cleanup success event in the terminating state", 
comp->name.value);
 
@@ -2074,6 +2075,14 @@ uint32_t avnd_comp_clc_terming_cleansucc
                m_AVND_COMP_REG_PARAM_RESET(cb, comp);
                m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, comp, 
AVND_CKPT_COMP_CONFIG);
        }
+
+       /* determine if this is a case of component failover */
+       if (m_AVND_COMP_IS_FAILED(comp) && m_AVND_SU_IS_FAILED(su) &&
+                       m_AVND_SU_IS_PREINSTANTIABLE(su) && (su->sufailover == 
false)) {
+               /* yes, request director to orchestrate component failover */
+               rc = avnd_di_oper_send(cb, su, SA_AMF_COMPONENT_FAILOVER);
+       }
+
        TRACE_LEAVE();
        return rc;
 }
diff --git a/osaf/services/saf/amf/amfnd/di.cc 
b/osaf/services/saf/amf/amfnd/di.cc
--- a/osaf/services/saf/amf/amfnd/di.cc
+++ b/osaf/services/saf/amf/amfnd/di.cc
@@ -476,7 +476,7 @@ uint32_t avnd_evt_mds_avd_dn_evh(AVND_CB
  
   Notes         : None.
 ******************************************************************************/
-uint32_t avnd_di_oper_send(AVND_CB *cb, AVND_SU *su, uint32_t rcvr)
+uint32_t avnd_di_oper_send(AVND_CB *cb, const AVND_SU *su, uint32_t rcvr)
 {
        AVND_MSG msg;
        uint32_t rc = NCSCC_RC_SUCCESS;
diff --git a/osaf/services/saf/amf/amfnd/err.cc 
b/osaf/services/saf/amf/amfnd/err.cc
--- a/osaf/services/saf/amf/amfnd/err.cc
+++ b/osaf/services/saf/amf/amfnd/err.cc
@@ -702,9 +702,6 @@ uint32_t avnd_err_rcvr_comp_failover(AVN
        m_AVND_SU_OPER_STATE_SET(su, SA_AMF_OPERATIONAL_DISABLED);
        m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su, AVND_CKPT_SU_OPER_STATE);
 
-       /* inform AvD */
-       rc = avnd_di_oper_send(cb, su, SA_AMF_COMPONENT_FAILOVER);
-
        /*
         *  su-sis may be in assigning/removing state. signal csi
         * assign/remove done so that su-si assignment/removal algo can proceed.
@@ -722,11 +719,15 @@ uint32_t avnd_err_rcvr_comp_failover(AVN
        if (NCSCC_RC_SUCCESS != rc)
                goto done;
 
-       /* clean the failed comp */
+       // TODO: there should be no difference between PI/NPI comps
        if (m_AVND_SU_IS_PREINSTANTIABLE(su)) {
+               /* clean the failed comp */
                rc = avnd_comp_clc_fsm_run(cb, failed_comp, 
AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
                if (NCSCC_RC_SUCCESS != rc)
                        goto done;
+       } else  {
+               /* request director to orchestrate component failover */
+               rc = avnd_di_oper_send(cb, failed_comp->su, 
AVSV_ERR_RCVR_SU_FAILOVER);
        }
 
  done:
diff --git a/osaf/services/saf/amf/amfnd/include/avnd_di.h 
b/osaf/services/saf/amf/amfnd/include/avnd_di.h
--- a/osaf/services/saf/amf/amfnd/include/avnd_di.h
+++ b/osaf/services/saf/amf/amfnd/include/avnd_di.h
@@ -68,7 +68,7 @@
 
 struct avnd_cb_tag;
 
-uint32_t avnd_di_oper_send(struct avnd_cb_tag *, AVND_SU *, uint32_t);
+uint32_t avnd_di_oper_send(struct avnd_cb_tag *, const AVND_SU *, uint32_t);
 uint32_t avnd_di_susi_resp_send(struct avnd_cb_tag *, AVND_SU *, 
AVND_SU_SI_REC *);
 uint32_t avnd_di_object_upd_send(struct avnd_cb_tag *, AVSV_PARAM_INFO *);
 uint32_t avnd_di_pg_act_send(struct avnd_cb_tag *, SaNameT *, 
AVSV_PG_TRACK_ACT, bool);

------------------------------------------------------------------------------
Android apps run on BlackBerry 10
Introducing the new BlackBerry 10.2.1 Runtime for Android apps.
Now with support for Jelly Bean, Bluetooth, Mapview and more.
Get your Android app in front of a whole new audience.  Start now.
http://pubads.g.doubleclick.net/gampad/clk?id=124407151&iu=/4140/ostg.clktrk
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to