src/fm/fmd/fm_evt.h  |    2 +-
 src/fm/fmd/fm_main.c |   78 ++++++---------------
 src/fm/fmd/fm_mds.c  |  181 ++++++++++++++++++++++++++++++++++++--------------
 3 files changed, 155 insertions(+), 106 deletions(-)


diff --git a/src/fm/fmd/fm_evt.h b/src/fm/fmd/fm_evt.h
--- a/src/fm/fmd/fm_evt.h
+++ b/src/fm/fmd/fm_evt.h
@@ -1,6 +1,7 @@
 /*      -*- OpenSAF  -*-
 *
 * (C) Copyright 2008 The OpenSAF Foundation
+* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -49,7 +50,6 @@ typedef enum {
        FM_EVT_NODE_DOWN,
        FM_EVT_PEER_UP,
        FM_EVT_RDA_ROLE,
-       FM_EVT_SVC_DOWN,
        FM_FSM_EVT_MAX
 } FM_FSM_EVT_CODE;
 
diff --git a/src/fm/fmd/fm_main.c b/src/fm/fmd/fm_main.c
--- a/src/fm/fmd/fm_main.c
+++ b/src/fm/fmd/fm_main.c
@@ -1,6 +1,7 @@
 /*      -*- OpenSAF  -*-
 *
 * (C) Copyright 2008 The OpenSAF Foundation
+* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -31,6 +32,7 @@ This file contains the main() routine fo
 #include "nid/agent/nid_api.h"
 #include "fm.h"
 #include "base/osaf_time.h"
+#include "base/osaf_poll.h"
 
 #define FM_CLM_API_TIMEOUT 10000000000LL
 
@@ -71,7 +73,6 @@ void handle_mbx_event(void);
 extern uint32_t fm_amf_init(FM_AMF_CB *fm_amf_cb);
 uint32_t gl_fm_hdl;
 static NCS_SEL_OBJ usr1_sel_obj;
-void fm_proc_svc_down(FM_CB *cb, FM_EVT *fm_mbx_evt);
 
 /**
  * USR1 signal is used when AMF wants instantiate us as a
@@ -176,6 +177,11 @@ int main(int argc, char *argv[])
         */
        fm_cb->control_tipc = true; /* Default behaviour */
 
+       fm_cb->immd_down = true; 
+       fm_cb->immnd_down = true; 
+       fm_cb->amfnd_down = true; 
+       fm_cb->amfd_down = true;
+
        /* Create CB handle */
        gl_fm_hdl = ncshm_create_hdl(NCS_HM_POOL_ID_COMMON, NCS_SERVICE_ID_GFM, 
(NCSCONTEXT)fm_cb);
 
@@ -194,7 +200,7 @@ int main(int argc, char *argv[])
                goto fm_init_failed;
        }
 
-/* Attach MBX */
+       /* Attach MBX */
        if (m_NCS_IPC_ATTACH(&fm_cb->mbx) != NCSCC_RC_SUCCESS) {
                syslog(LOG_ERR, "m_NCS_IPC_ATTACH() failed.");
                goto fm_init_failed;
@@ -268,7 +274,7 @@ int main(int argc, char *argv[])
  
        /* notify the NID */
        if (nid_started)
-               fm_nid_notify(NCSCC_RC_SUCCESS);
+               fm_nid_notify((uint32_t) NCSCC_RC_SUCCESS);
 
        while (1) {
                ret = poll(fds, nfds, -1);
@@ -454,52 +460,6 @@ static uint32_t fm_get_args(FM_CB *fm_cb
        return NCSCC_RC_SUCCESS;
 }
 
-void fm_proc_svc_down(FM_CB *cb, FM_EVT *fm_mbx_evt)
-{
-       switch (fm_mbx_evt->svc_id) {
-               case NCSMDS_SVC_ID_IMMND:
-                       cb->immnd_down = true;
-                       LOG_NO("IMMND down on: %x", cb->peer_node_id);
-                       break;
-               case NCSMDS_SVC_ID_AVND:
-                       cb->amfnd_down = true;
-                       LOG_NO("AMFND down on: %x", cb->peer_node_id);
-                       break;
-               case NCSMDS_SVC_ID_IMMD:
-                       cb->immd_down = true;
-                       LOG_NO("IMMD down on: %x", cb->peer_node_id);
-                       break;
-               case NCSMDS_SVC_ID_AVD:
-                       cb->amfd_down = true;
-                       LOG_NO("AVD down on: %x", cb->peer_node_id);
-                       break;
-               case NCSMDS_SVC_ID_GFM:
-                       cb->fm_down = true;
-                       LOG_NO("FM down on: %x", cb->peer_node_id);
-                       break;
-               default:
-                       break;
-       }
-
-       /* Processing only for alternate node.
-       * Service downs of AMFND, IMMD, IMMND is the same as NODE_DOWN from 4.4 
onwards.
-       * This is required to handle the usecase involving
-       * '/etc/init.d/opensafd stop' without an OS reboot cycle
-       * Process service downs only if OpenSAF is not controlling TIPC.
-       * If OpenSAF is controlling TIPC, just wait for NODE_DOWN to trigger 
failover.
-       */
-       if (cb->immd_down && cb->immnd_down && cb->amfnd_down && cb->amfd_down 
&& cb->fm_down) {
-               LOG_NO("Core services went down on node_id: %x", 
fm_mbx_evt->node_id);
-               fm_send_node_down_to_mbx(cb, fm_mbx_evt->node_id);
-               /* Reset peer downs, because we've made MDS RED subscriptions */
-               cb->immd_down = false;
-               cb->immnd_down = false;
-               cb->amfnd_down = false;
-               cb->amfd_down = false;
-               cb->fm_down = false;
-       }
-}
-
 /****************************************************************************
 * Name          : fm_clm_init
 *
@@ -642,11 +602,18 @@ static void fm_mbx_msg_handler(FM_CB *fm
                        }
                }
                break;
-       case FM_EVT_SVC_DOWN:
-               fm_proc_svc_down(fm_cb, fm_mbx_evt);
-               break;
+
        case FM_EVT_PEER_UP:
-/* Peer fm came up so sending ee_id of this node */
+               /* Weird situation in a cluster, where the new-Active 
controller node founds the peer node
+                * (old-Active) is still in the progress of shutdown (i.e., 
amfd/immd is still alive). 
+                */
+               if ((fm_cb->role == PCS_RDA_ACTIVE) && (fm_cb->csi_assigned == 
false)) {
+                       LOG_ER("Two active controllers observed in a cluster, 
newActive: %x and old-Active: %x", fm_cb->node_id, fm_cb->peer_node_id);
+                       opensaf_reboot(fm_cb->peer_node_id, NULL,
+                       "Received svc up from peer node (old-active is not 
fully DOWN), hence rebooting the new Active");
+               }
+
+               /* Peer fm came up so sending ee_id of this node */
                if (fm_cb->node_name.length != 0)
                        fms_fms_exchange_node_info(fm_cb);
 
@@ -654,8 +621,9 @@ static void fm_mbx_msg_handler(FM_CB *fm
                        get_peer_clm_node_name(fm_mbx_evt->node_id);
                }
                break;
+
        case FM_EVT_TMR_EXP:
-/* Timer Expiry event posted */
+               /* Timer Expiry event posted */
                if (fm_mbx_evt->info.fm_tmr->type == FM_TMR_PROMOTE_ACTIVE) {
                        /* Check whether node(AMF) initialization is done */
                        if (fm_cb->csi_assigned == false) {
@@ -684,9 +652,11 @@ static void fm_mbx_msg_handler(FM_CB *fm
                                       "within the time limit");
                }
                break;
+
        case FM_EVT_RDA_ROLE:
                fm_evt_proc_rda_callback(fm_cb, fm_mbx_evt);
                break;
+
        default:
                break;
        }
diff --git a/src/fm/fmd/fm_mds.c b/src/fm/fmd/fm_mds.c
--- a/src/fm/fmd/fm_mds.c
+++ b/src/fm/fmd/fm_mds.c
@@ -1,6 +1,7 @@
 /*      -*- OpenSAF  -*-
 *
 * (C) Copyright 2008 The OpenSAF Foundation
+* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -34,6 +35,7 @@ static void check_for_node_isolation(FM_
 static bool has_been_well_connected_recently(FM_CB *cb);
 static uint32_t fm_mds_node_evt(FM_CB *cb, MDS_CALLBACK_NODE_EVENT_INFO * 
node_evt);
 static uint32_t fm_fill_mds_evt_post_fm_mbx(FM_CB *cb, FM_EVT *fm_evt, NODE_ID 
node_id, FM_FSM_EVT_CODE evt_code);
+static void fm_proc_svc_down(FM_CB *cb, uint32_t node_id, NCSMDS_SVC_ID 
svc_id);
 
 uint32_t
 fm_mds_sync_send(FM_CB *fm_cb, NCSCONTEXT msg,
@@ -62,7 +64,7 @@ uint32_t fm_mds_init(FM_CB *cb)
 {
        NCSMDS_INFO arg;
        MDS_SVC_ID svc_id[] = { NCSMDS_SVC_ID_GFM, NCSMDS_SVC_ID_AVND, 
NCSMDS_SVC_ID_IMMND };
-       MDS_SVC_ID immd_id[2] = { NCSMDS_SVC_ID_IMMD, NCSMDS_SVC_ID_AVD };
+       MDS_SVC_ID svc_red_id[2] = { NCSMDS_SVC_ID_IMMD, NCSMDS_SVC_ID_AVD };
 
 /* Get the MDS handles to be used. */
        if (fm_mds_get_adest_hdls(cb) != NCSCC_RC_SUCCESS) {
@@ -111,7 +113,7 @@ uint32_t fm_mds_init(FM_CB *cb)
         arg.i_op = MDS_RED_SUBSCRIBE;
         arg.info.svc_subscribe.i_num_svcs = 2;
         arg.info.svc_subscribe.i_scope = NCSMDS_SCOPE_NONE;
-        arg.info.svc_subscribe.i_svc_ids = immd_id;
+        arg.info.svc_subscribe.i_svc_ids = svc_red_id;
         if (ncsmds_api(&arg) == NCSCC_RC_FAILURE) {
                syslog(LOG_ERR, "MDS_RED_SUBSCRIBE failed");
                arg.i_op = MDS_UNINSTALL;
@@ -285,25 +287,52 @@ uint32_t fm_send_node_down_to_mbx(FM_CB 
        return rc;
 }
 
-static void fm_send_svc_down_to_mbx(FM_CB *cb, uint32_t node_id, NCSMDS_SVC_ID 
svc_id)
+void fm_proc_svc_down(FM_CB *cb, uint32_t node_id, NCSMDS_SVC_ID svc_id)
 {
-       FM_EVT *fm_evt = NULL;
-       uint32_t rc = NCSCC_RC_SUCCESS;
-       fm_evt = m_MMGR_ALLOC_FM_EVT;
-       if (NULL == fm_evt) {
-               syslog(LOG_INFO, "fm_mds_rcv_evt: fm_evt allocation FAILED.");
-               return;
+       TRACE_ENTER2("SVC ID: %d", (int) svc_id);
+       switch (svc_id) {
+               case NCSMDS_SVC_ID_IMMND:
+                       cb->immnd_down = true;
+                       LOG_NO("IMMND down on: %x", cb->peer_node_id);
+                       break;
+               case NCSMDS_SVC_ID_AVND:
+                       cb->amfnd_down = true;
+                       LOG_NO("AMFND down on: %x", cb->peer_node_id);
+                       break;
+               case NCSMDS_SVC_ID_IMMD:
+                       cb->immd_down = true;
+                       LOG_NO("IMMD down on: %x", cb->peer_node_id);
+                       break;
+               case NCSMDS_SVC_ID_AVD:
+                       cb->amfd_down = true;
+                       LOG_NO("AVD down on: %x", cb->peer_node_id);
+                       break;
+               case NCSMDS_SVC_ID_GFM:
+                       cb->fm_down = true;
+                       LOG_NO("FM down on: %x", cb->peer_node_id);
+                       break;
+               default:
+                       break;
        }
-       fm_evt->svc_id = svc_id;
-       rc = fm_fill_mds_evt_post_fm_mbx(cb, fm_evt, node_id, FM_EVT_SVC_DOWN);
-       if (rc == NCSCC_RC_FAILURE) {
-               m_MMGR_FREE_FM_EVT(fm_evt);
-               LOG_IN("service down event post to mailbox failed");
-               fm_evt = NULL;
+
+       /* Processing only for alternate node.
+        * Service downs of AMFND, IMMD, IMMND is the same as NODE_DOWN from 
4.4 onwards.
+        * This is required to handle the usecase involving
+        * '/etc/init.d/opensafd stop' without an OS reboot cycle
+        * Process service downs only if OpenSAF is not controlling TIPC.
+        * If OpenSAF is controlling TIPC, just wait for NODE_DOWN to trigger 
failover.
+        */
+       if (cb->immd_down && cb->immnd_down && cb->amfnd_down && cb->amfd_down 
&& cb->fm_down) {
+               LOG_NO("Core services went down on node_id: %x", node_id);
+               
+               if(!cb->control_tipc) 
+                       fm_send_node_down_to_mbx(cb, node_id);
        }
-       return;
+
+       TRACE_LEAVE();  
 }
 
+
 static void check_for_node_isolation(FM_CB *cb)
 {
        bool well_connected = cb->peer_sc_up && cb->cluster_size >= 3;
@@ -393,8 +422,7 @@ static uint32_t fm_mds_node_evt(FM_CB *c
 *****************************************************************************/
 static uint32_t fm_mds_svc_evt(FM_CB *cb, MDS_CALLBACK_SVC_EVENT_INFO *svc_evt)
 {
-       uint32_t return_val = NCSCC_RC_SUCCESS;
-       FM_EVT *fm_evt;
+       FM_EVT *fm_evt = NULL;
        TRACE_ENTER();
 
        if (NULL == svc_evt) {
@@ -413,43 +441,29 @@ static uint32_t fm_mds_svc_evt(FM_CB *cb
                                        cb->peer_sc_up = false;
                                        check_for_node_isolation(cb);
                                        cb->peer_adest = 0;
-                                       if (!cb->control_tipc) {
-                                               fm_send_svc_down_to_mbx(cb, 
svc_evt->i_node_id, svc_evt->i_svc_id);
-                                       }
+
+                                       fm_proc_svc_down(cb, 
svc_evt->i_node_id, svc_evt->i_svc_id);
                                }
                                break;
                        case NCSMDS_SVC_ID_IMMND:
-                               if (svc_evt->i_node_id == cb->peer_node_id
-                                                       && !cb->control_tipc) {
-                                       fm_send_svc_down_to_mbx(cb, 
svc_evt->i_node_id, svc_evt->i_svc_id);
-                               }
-                               break;
                        case NCSMDS_SVC_ID_AVND:
-                               if (svc_evt->i_node_id == cb->peer_node_id
-                                                       && !cb->control_tipc) {
-                                       fm_send_svc_down_to_mbx(cb, 
svc_evt->i_node_id, svc_evt->i_svc_id);
+                               if (svc_evt->i_node_id == cb->peer_node_id) {
+                                       fm_proc_svc_down(cb, 
svc_evt->i_node_id, svc_evt->i_svc_id);
                                }
                                break;
                        default:
                                TRACE("Not interested in service down of other 
services");
                                break;
                }
-
                break;
 
        case NCSMDS_RED_DOWN:
                switch (svc_evt->i_svc_id) {
                        /* Depend on service downs if OpenSAF is not controling 
TIPC */
                        case NCSMDS_SVC_ID_IMMD:
-                               if (svc_evt->i_node_id == cb->peer_node_id
-                                                       && !cb->control_tipc) {
-                                       fm_send_svc_down_to_mbx(cb, 
svc_evt->i_node_id, svc_evt->i_svc_id);
-                               }
-                               break;
                        case NCSMDS_SVC_ID_AVD:
-                               if (svc_evt->i_node_id == cb->peer_node_id
-                                                       && !cb->control_tipc) {
-                                       fm_send_svc_down_to_mbx(cb, 
svc_evt->i_node_id, svc_evt->i_svc_id);
+                               if (svc_evt->i_node_id == cb->peer_node_id) {
+                                       fm_proc_svc_down(cb, 
svc_evt->i_node_id, svc_evt->i_svc_id);
                                }
                                break;
                        default:
@@ -465,43 +479,108 @@ static uint32_t fm_mds_svc_evt(FM_CB *cb
                                TRACE("Peer fm status change: %d -> %d, peer 
node id is: %x, cluster size is %llu",
                                      (int) cb->peer_sc_up, 1, 
svc_evt->i_node_id, (unsigned long long) cb->cluster_size);
                                cb->peer_sc_up = true;
+                               cb->fm_down = false;
                                check_for_node_isolation(cb);
 
                                fm_evt = m_MMGR_ALLOC_FM_EVT;
-                               if (NULL == fm_evt) {
-                                       syslog(LOG_INFO, "fm_mds_svc_evt: 
fm_evt allocation FAILED.");
-                                       return NCSCC_RC_FAILURE;
-                               }
+                               if (NULL == fm_evt) {
+                                       syslog(LOG_INFO, "fm_mds_svc_evt: 
fm_evt allocation FAILED.");
+                                       return NCSCC_RC_FAILURE;
+                               }
+
                                cb->peer_adest = svc_evt->i_dest;
                                cb->peer_node_id = svc_evt->i_node_id;
                                cb->peer_node_terminated = false;
-                               return_val = fm_fill_mds_evt_post_fm_mbx(cb, 
fm_evt, cb->peer_node_id, FM_EVT_PEER_UP);
 
-                               if (NCSCC_RC_FAILURE == return_val) {
-                                       m_MMGR_FREE_FM_EVT(fm_evt);
-                                       fm_evt = NULL;
-                               }
+                               if(fm_fill_mds_evt_post_fm_mbx(cb, fm_evt, 
cb->peer_node_id, FM_EVT_PEER_UP) == NCSCC_RC_FAILURE)
+                               {
+                                       m_MMGR_FREE_FM_EVT(fm_evt);
+                                       fm_evt = NULL;
+                               }                       
                        }
                        break;
+
                case NCSMDS_SVC_ID_IMMND:
-                               if (svc_evt->i_node_id == cb->peer_node_id
-                                                       && !cb->control_tipc)
-                                       cb->immnd_down = false; /* Only IMMND 
is restartable */
+                       if (svc_evt->i_node_id == cb->peer_node_id){
+                               TRACE("Peer immnd status change: %d -> %d, peer 
node id is: %x, cluster size is %llu",
+                                     (int) cb->peer_sc_up, 1, 
svc_evt->i_node_id, (unsigned long long) cb->cluster_size);
+                               cb->immnd_down = false;
+                       }
+                       break;
+
+               case NCSMDS_SVC_ID_AVND:
+                       if (svc_evt->i_node_id == cb->peer_node_id){
+                               TRACE("Peer amfnd status change: %d -> %d, peer 
node id is: %x, cluster size is %llu",
+                                     (int) cb->peer_sc_up, 1, 
svc_evt->i_node_id, (unsigned long long) cb->cluster_size);
+                               cb->amfnd_down = false;
+                       }
                        break;
                default:
                        break;
                }
                break;
 
+       case NCSMDS_RED_UP:
+               switch (svc_evt->i_svc_id) {
+               /* Depend on service downs if OpenSAF is not controling TIPC */
+               case NCSMDS_SVC_ID_IMMD:
+                       if (svc_evt->i_node_id != cb->node_id) {
+                               TRACE("Peer immd status change: %d -> %d, peer 
node id is: %x, cluster size is %llu",
+                                     (int) cb->peer_sc_up, 1, 
svc_evt->i_node_id, (unsigned long long) cb->cluster_size);
+                               cb->peer_node_id = svc_evt->i_node_id;
+                               cb->immd_down = false;
+
+                               fm_evt = m_MMGR_ALLOC_FM_EVT;
+                               if (NULL == fm_evt) {
+                                       syslog(LOG_INFO, "fm_mds_svc_evt: 
fm_evt allocation FAILED.");
+                                       return NCSCC_RC_FAILURE;
+                               }
+
+                               if(fm_fill_mds_evt_post_fm_mbx(cb, fm_evt, 
cb->peer_node_id, FM_EVT_PEER_UP) == NCSCC_RC_FAILURE)
+                               {
+                                       m_MMGR_FREE_FM_EVT(fm_evt);
+                                       fm_evt = NULL;
+                               }                       
+                       }       
+                       break;
+
+               case NCSMDS_SVC_ID_AVD:
+                       if (svc_evt->i_node_id != cb->node_id) {
+                               TRACE("Peer amfd status change: %d -> %d, peer 
node id is: %x, cluster size is %llu",
+                                     (int) cb->peer_sc_up, 1, 
svc_evt->i_node_id, (unsigned long long) cb->cluster_size);
+                               cb->peer_node_id = svc_evt->i_node_id;
+                               cb->amfd_down = false;
+
+                               fm_evt = m_MMGR_ALLOC_FM_EVT;
+                               if (NULL == fm_evt) {
+                                       syslog(LOG_INFO, "fm_mds_svc_evt: 
fm_evt allocation FAILED.");
+                                       return NCSCC_RC_FAILURE;
+                               }
+
+                               if(fm_fill_mds_evt_post_fm_mbx(cb, fm_evt, 
cb->peer_node_id, FM_EVT_PEER_UP) == NCSCC_RC_FAILURE)
+                               {
+                                       m_MMGR_FREE_FM_EVT(fm_evt);
+                                       fm_evt = NULL;
+                               }                       
+                       }       
+                       break;
+
+               default:
+                       TRACE("Not interested in service down of other 
services");
+                       break;
+               }
+               break;
+
        default:
                syslog(LOG_INFO, "Wrong MDS event");
                break;
        }
 
        TRACE_LEAVE();
-       return return_val;
+       return NCSCC_RC_SUCCESS;
 }
 
+
 /***************************************************************************
 * Name          : fm_mds_rcv_evt 
 *

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to