osaf/services/saf/amf/amfnd/di.cc | 20 ++++++++++++++++++++ osaf/services/saf/amf/amfnd/evt.cc | 4 +++- osaf/services/saf/amf/amfnd/include/avnd_cb.h | 1 + osaf/services/saf/amf/amfnd/include/avnd_evt.h | 1 + osaf/services/saf/amf/amfnd/mds.cc | 15 ++++++--------- osaf/services/saf/amf/amfnd/verify.cc | 2 ++ 6 files changed, 33 insertions(+), 10 deletions(-)
If AvD Adest UP comes after AvD Down, this means that link has been reset between controller and payload. If data verify messages comes after AvD Adest down and then AvD Adest Up comes, this means that it is a case of contorller failover. This patch marks a flag when Amfnd receives AvD Adest Down and Reset the flag when it gets Data Verify message during failover. If AvD Adest UP comes after Data verify message, then Amfnd doesn;t take any action in this context. But if AvD Adest Up comes just after AvD Adest Down, then Amfnd reboots itself. We have solved the problem of link flickering between Act controller and Payload. Still problem of TIPC link flickering among controllers remains the concerns. During testing of this patch, we had seen problem in controllers and cluster reset. This patch doesn't solve the problem of cluster reset because of TIPC link flickering between the controllers. diff --git a/osaf/services/saf/amf/amfnd/di.cc b/osaf/services/saf/amf/amfnd/di.cc --- a/osaf/services/saf/amf/amfnd/di.cc +++ b/osaf/services/saf/amf/amfnd/di.cc @@ -386,6 +386,17 @@ done: uint32_t avnd_evt_mds_avd_up_evh(AVND_CB *cb, AVND_EVT *evt) { TRACE_ENTER2("%" PRIx64, evt->info.mds.mds_dest); + + /* If AvD UP event has come just after AvD DOWN for Act controller, then it is a case of + TIPC flicker */ + + if ((m_MDS_DEST_IS_AN_ADEST(evt->info.mds.mds_dest) && avnd_cb->cont_reboot_in_progress) && + (evt->info.mds.mds_dest == cb->active_avd_adest)) { + cb->reboot_in_progress = true; + opensaf_reboot(avnd_cb->node_info.nodeId, (char *)avnd_cb->node_info.executionEnvironment.value, + "Link reset with Act controller"); + goto done; + } /* Validate whether this is a ADEST or VDEST */ if (m_MDS_DEST_IS_AN_ADEST(evt->info.mds.mds_dest)) { @@ -437,6 +448,15 @@ uint32_t avnd_evt_mds_avd_dn_evh(AVND_CB TRACE_ENTER(); + if (m_MDS_DEST_IS_AN_ADEST(evt->info.mds.mds_dest)) { + if (evt->info.mds.node_id != ncs_get_node_id()) { + /* Ignore the other AVD Adest Down.*/ + if(evt->info.mds.mds_dest == cb->active_avd_adest) + avnd_cb->cont_reboot_in_progress = true; + return rc; + } + } + LOG_ER("AMF director unexpectedly crashed"); /* Don't issue reboot if it has been already issued.*/ diff --git a/osaf/services/saf/amf/amfnd/evt.cc b/osaf/services/saf/amf/amfnd/evt.cc --- a/osaf/services/saf/amf/amfnd/evt.cc +++ b/osaf/services/saf/amf/amfnd/evt.cc @@ -134,8 +134,10 @@ AVND_EVT *avnd_evt_create(AVND_CB *cb, break; /* mds event types */ + case AVND_EVT_MDS_AVD_DN: + evt->info.mds.node_id = *(uint32_t *)info; + /* Don't break, continue */ case AVND_EVT_MDS_AVD_UP: - case AVND_EVT_MDS_AVD_DN: case AVND_EVT_MDS_AVND_DN: case AVND_EVT_MDS_AVND_UP: evt->priority = NCS_IPC_PRIORITY_HIGH; /* bump up the priority */ diff --git a/osaf/services/saf/amf/amfnd/include/avnd_cb.h b/osaf/services/saf/amf/amfnd/include/avnd_cb.h --- a/osaf/services/saf/amf/amfnd/include/avnd_cb.h +++ b/osaf/services/saf/amf/amfnd/include/avnd_cb.h @@ -130,6 +130,7 @@ typedef struct avnd_cb_tag { SaBoolT first_time_up; bool reboot_in_progress; AVND_SU *failed_su; + bool cont_reboot_in_progress; } AVND_CB; #define AVND_CB_NULL ((AVND_CB *)0) diff --git a/osaf/services/saf/amf/amfnd/include/avnd_evt.h b/osaf/services/saf/amf/amfnd/include/avnd_evt.h --- a/osaf/services/saf/amf/amfnd/include/avnd_evt.h +++ b/osaf/services/saf/amf/amfnd/include/avnd_evt.h @@ -127,6 +127,7 @@ typedef struct avnd_tmr_evt { /* mds event definition */ typedef struct avnd_mds_evt { MDS_DEST mds_dest; /* mds address */ + NODE_ID node_id; } AVND_MDS_EVT; /* HA STATE change event definition */ diff --git a/osaf/services/saf/amf/amfnd/mds.cc b/osaf/services/saf/amf/amfnd/mds.cc --- a/osaf/services/saf/amf/amfnd/mds.cc +++ b/osaf/services/saf/amf/amfnd/mds.cc @@ -602,16 +602,13 @@ uint32_t avnd_mds_svc_evt(AVND_CB *cb, M case NCSMDS_DOWN: switch (evt_info->i_svc_id) { case NCSMDS_SVC_ID_AVD: - if (m_MDS_DEST_IS_AN_ADEST(evt_info->i_dest)) { - /* Supervise our node local director */ - if (evt_info->i_node_id != ncs_get_node_id()) { - /* Ignore the other AVD Adest Down.*/ - return rc; - } - } + if (m_MDS_DEST_IS_AN_ADEST(evt_info->i_dest) && (evt_info->i_node_id != ncs_get_node_id())) { + /* No action is required, Proceed ahead. */ + } else /* Reset the vdest as this node is going to get rebooted. */ + memset(&cb->avd_dest, 0, sizeof(MDS_DEST)); - memset(&cb->avd_dest, 0, sizeof(MDS_DEST)); - evt = avnd_evt_create(cb, AVND_EVT_MDS_AVD_DN, 0, &evt_info->i_dest, 0, 0, 0); + evt = avnd_evt_create(cb, AVND_EVT_MDS_AVD_DN, 0, &evt_info->i_dest, &evt_info->i_node_id, + 0, 0); break; case NCSMDS_SVC_ID_AVA: diff --git a/osaf/services/saf/amf/amfnd/verify.cc b/osaf/services/saf/amf/amfnd/verify.cc --- a/osaf/services/saf/amf/amfnd/verify.cc +++ b/osaf/services/saf/amf/amfnd/verify.cc @@ -92,6 +92,8 @@ uint32_t avnd_evt_avd_verify_evh(AVND_CB bool msg_found = false; TRACE_ENTER2("Data Verify message received from newly ACTIVE AVD"); + /* We need to reset the flag as it looks failover case. */ + avnd_cb->cont_reboot_in_progress = false; info = &evt->info.avd->msg_info.d2n_data_verify; ------------------------------------------------------------------------------ Shape the Mobile Experience: Free Subscription Software experts and developers: Be at the forefront of tech innovation. Intel(R) Software Adrenaline delivers strategic insight and game-changing conversations that shape the rapidly evolving mobile landscape. Sign up now. http://pubads.g.doubleclick.net/gampad/clk?id=63431311&iu=/4140/ostg.clktrk _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel