osaf/services/saf/amf/amfnd/di.cc              |  20 ++++++++++++++++++++
 osaf/services/saf/amf/amfnd/evt.cc             |   4 +++-
 osaf/services/saf/amf/amfnd/include/avnd_cb.h  |   1 +
 osaf/services/saf/amf/amfnd/include/avnd_evt.h |   1 +
 osaf/services/saf/amf/amfnd/mds.cc             |  15 ++++++---------
 osaf/services/saf/amf/amfnd/verify.cc          |   2 ++
 6 files changed, 33 insertions(+), 10 deletions(-)


If AvD Adest UP comes after AvD Down, this means that link has been reset 
between controller and payload.
If data verify messages comes after AvD Adest down and then AvD Adest Up comes, 
this means that it is a case
of contorller failover.
This patch marks a flag when Amfnd receives AvD Adest Down and Reset the flag 
when it gets Data Verify message
during failover. If AvD Adest UP comes after Data verify message, then Amfnd 
doesn;t take any action in this context.
But if AvD Adest Up comes just after AvD Adest Down, then Amfnd reboots itself.

We have solved the problem of link flickering between Act controller and 
Payload.
Still problem of TIPC link flickering among controllers remains the concerns.
During testing of this patch, we had seen problem in controllers and cluster 
reset. This patch doesn't solve the problem of cluster reset because of TIPC 
link flickering between the controllers.

diff --git a/osaf/services/saf/amf/amfnd/di.cc 
b/osaf/services/saf/amf/amfnd/di.cc
--- a/osaf/services/saf/amf/amfnd/di.cc
+++ b/osaf/services/saf/amf/amfnd/di.cc
@@ -386,6 +386,17 @@ done:
 uint32_t avnd_evt_mds_avd_up_evh(AVND_CB *cb, AVND_EVT *evt)
 {
        TRACE_ENTER2("%" PRIx64, evt->info.mds.mds_dest);
+
+       /* If AvD UP event has come just after AvD DOWN for Act controller, 
then it is a case of
+          TIPC flicker */
+
+       if ((m_MDS_DEST_IS_AN_ADEST(evt->info.mds.mds_dest) && 
avnd_cb->cont_reboot_in_progress) &&
+                       (evt->info.mds.mds_dest == cb->active_avd_adest)) {
+               cb->reboot_in_progress = true;
+               opensaf_reboot(avnd_cb->node_info.nodeId, (char 
*)avnd_cb->node_info.executionEnvironment.value,
+                               "Link reset with Act controller");
+               goto done;
+       }
        
        /* Validate whether this is a ADEST or VDEST */
        if (m_MDS_DEST_IS_AN_ADEST(evt->info.mds.mds_dest)) {
@@ -437,6 +448,15 @@ uint32_t avnd_evt_mds_avd_dn_evh(AVND_CB
 
        TRACE_ENTER();
 
+       if (m_MDS_DEST_IS_AN_ADEST(evt->info.mds.mds_dest)) {
+               if (evt->info.mds.node_id != ncs_get_node_id()) {
+                       /* Ignore the other AVD Adest Down.*/
+                       if(evt->info.mds.mds_dest == cb->active_avd_adest)
+                               avnd_cb->cont_reboot_in_progress = true;
+                       return rc;
+               }
+       }
+
        LOG_ER("AMF director unexpectedly crashed");
 
        /* Don't issue reboot if it has been already issued.*/
diff --git a/osaf/services/saf/amf/amfnd/evt.cc 
b/osaf/services/saf/amf/amfnd/evt.cc
--- a/osaf/services/saf/amf/amfnd/evt.cc
+++ b/osaf/services/saf/amf/amfnd/evt.cc
@@ -134,8 +134,10 @@ AVND_EVT *avnd_evt_create(AVND_CB *cb,
                break;
 
                /* mds event types */
+       case AVND_EVT_MDS_AVD_DN:
+               evt->info.mds.node_id = *(uint32_t *)info;
+               /* Don't break, continue */
        case AVND_EVT_MDS_AVD_UP:
-       case AVND_EVT_MDS_AVD_DN:
        case AVND_EVT_MDS_AVND_DN:
        case AVND_EVT_MDS_AVND_UP:
                evt->priority = NCS_IPC_PRIORITY_HIGH;  /* bump up the priority 
*/
diff --git a/osaf/services/saf/amf/amfnd/include/avnd_cb.h 
b/osaf/services/saf/amf/amfnd/include/avnd_cb.h
--- a/osaf/services/saf/amf/amfnd/include/avnd_cb.h
+++ b/osaf/services/saf/amf/amfnd/include/avnd_cb.h
@@ -130,6 +130,7 @@ typedef struct avnd_cb_tag {
        SaBoolT first_time_up;
        bool reboot_in_progress;
        AVND_SU *failed_su;
+       bool cont_reboot_in_progress;
 } AVND_CB;
 
 #define AVND_CB_NULL ((AVND_CB *)0)
diff --git a/osaf/services/saf/amf/amfnd/include/avnd_evt.h 
b/osaf/services/saf/amf/amfnd/include/avnd_evt.h
--- a/osaf/services/saf/amf/amfnd/include/avnd_evt.h
+++ b/osaf/services/saf/amf/amfnd/include/avnd_evt.h
@@ -127,6 +127,7 @@ typedef struct avnd_tmr_evt {
 /* mds event definition */
 typedef struct avnd_mds_evt {
        MDS_DEST mds_dest;      /* mds address */
+       NODE_ID node_id;
 } AVND_MDS_EVT;
 
 /* HA STATE change event definition */
diff --git a/osaf/services/saf/amf/amfnd/mds.cc 
b/osaf/services/saf/amf/amfnd/mds.cc
--- a/osaf/services/saf/amf/amfnd/mds.cc
+++ b/osaf/services/saf/amf/amfnd/mds.cc
@@ -602,16 +602,13 @@ uint32_t avnd_mds_svc_evt(AVND_CB *cb, M
        case NCSMDS_DOWN:
                switch (evt_info->i_svc_id) {
                case NCSMDS_SVC_ID_AVD:
-                       if (m_MDS_DEST_IS_AN_ADEST(evt_info->i_dest)) {
-                               /* Supervise our node local director */
-                               if (evt_info->i_node_id != ncs_get_node_id()) {
-                                       /* Ignore the other AVD Adest Down.*/
-                                       return rc;
-                               }
-                       }
+                       if (m_MDS_DEST_IS_AN_ADEST(evt_info->i_dest) && 
(evt_info->i_node_id != ncs_get_node_id())) {
+                               /* No action is required, Proceed ahead. */
+                       } else /* Reset the vdest as this node is going to get 
rebooted. */ 
+                               memset(&cb->avd_dest, 0, sizeof(MDS_DEST));
 
-                       memset(&cb->avd_dest, 0, sizeof(MDS_DEST));
-                       evt = avnd_evt_create(cb, AVND_EVT_MDS_AVD_DN, 0, 
&evt_info->i_dest, 0, 0, 0);
+                       evt = avnd_evt_create(cb, AVND_EVT_MDS_AVD_DN, 0, 
&evt_info->i_dest, &evt_info->i_node_id,
+                                       0, 0);
                        break;
 
                case NCSMDS_SVC_ID_AVA:
diff --git a/osaf/services/saf/amf/amfnd/verify.cc 
b/osaf/services/saf/amf/amfnd/verify.cc
--- a/osaf/services/saf/amf/amfnd/verify.cc
+++ b/osaf/services/saf/amf/amfnd/verify.cc
@@ -92,6 +92,8 @@ uint32_t avnd_evt_avd_verify_evh(AVND_CB
        bool msg_found = false;
 
        TRACE_ENTER2("Data Verify message received from newly ACTIVE AVD");
+       /* We need to reset the flag as it looks failover case. */
+       avnd_cb->cont_reboot_in_progress = false;
 
        info = &evt->info.avd->msg_info.d2n_data_verify;
 

------------------------------------------------------------------------------
Shape the Mobile Experience: Free Subscription
Software experts and developers: Be at the forefront of tech innovation.
Intel(R) Software Adrenaline delivers strategic insight and game-changing 
conversations that shape the rapidly evolving mobile landscape. Sign up now. 
http://pubads.g.doubleclick.net/gampad/clk?id=63431311&iu=/4140/ostg.clktrk
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to