On 24-Feb-17 4:07 PM, ramesh betham wrote: > Good catch. Hitting the case of fm_peer_down_wait() is very unlikely. > > But here fm_peer_down_wait() is called only before fm nid_notifies and > considering for amfnd-up event too. A rare and race condition can hit > where fm on upcoming new active receives fm-down event and amfnd is > still alive. > But the if block where cb->amfnd_down is marked false assumes that cb->peer_node_id is already set in RED_UP events of IMMD or AVD. Is there any guarantee from MDS that RED_UP event will always come before normal MDS_UP event?
Thanks, Praveen > Thanks, > Ramesh. > > On 2/24/2017 2:18 PM, praveen malviya wrote: >> Hi Ramesh, >> >> One minor query: >> In RED_UP of peer AVD, newly active SC will reboot itself if peer FM >> on old active SC is not up. If this true then in which situations >> newly active SC will wait in fm_peer_down_wait(). >> >> Thanks, >> Praveen >> >> >> On 22-Feb-17 5:00 PM, ramesh.bet...@oracle.com wrote: >>> src/fm/fmd/fm_cb.h | 3 + >>> src/fm/fmd/fm_evt.h | 2 +- >>> src/fm/fmd/fm_main.c | 114 +++++++++++++++++--------------- >>> src/fm/fmd/fm_mds.c | 173 >>> +++++++++++++++++++++++++++++++++++--------------- >>> 4 files changed, 186 insertions(+), 106 deletions(-) >>> >>> >>> This patch addresses the specific scenario where the new Active is >>> coming up and has discovered the afmd process on the peer node (which >>> is going down) is still alive. Here the peer amfd/amfnd is still in >>> the process of going down i.e., progressing in termination of >>> application components having big timeouts etc. >>> >>> diff --git a/src/fm/fmd/fm_cb.h b/src/fm/fmd/fm_cb.h >>> --- a/src/fm/fmd/fm_cb.h >>> +++ b/src/fm/fmd/fm_cb.h >>> @@ -1,6 +1,7 @@ >>> /* -*- OpenSAF -*- >>> * >>> * (C) Copyright 2008 The OpenSAF Foundation >>> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights >>> reserved. >>> * >>> * This program is distributed in the hope that it will be useful, but >>> * WITHOUT ANY WARRANTY; without even the implied warranty of >>> MERCHANTABILITY >>> @@ -107,6 +108,8 @@ typedef struct fm_cb { >>> bool use_remote_fencing; >>> SaNameT peer_clm_node_name; >>> bool peer_node_terminated; >>> + NCS_SEL_OBJ peer_down_obj; >>> + int peer_down_await; >>> } FM_CB; >>> >>> extern char *role_string[]; >>> diff --git a/src/fm/fmd/fm_evt.h b/src/fm/fmd/fm_evt.h >>> --- a/src/fm/fmd/fm_evt.h >>> +++ b/src/fm/fmd/fm_evt.h >>> @@ -1,6 +1,7 @@ >>> /* -*- OpenSAF -*- >>> * >>> * (C) Copyright 2008 The OpenSAF Foundation >>> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights >>> reserved. >>> * >>> * This program is distributed in the hope that it will be useful, but >>> * WITHOUT ANY WARRANTY; without even the implied warranty of >>> MERCHANTABILITY >>> @@ -49,7 +50,6 @@ typedef enum { >>> FM_EVT_NODE_DOWN, >>> FM_EVT_PEER_UP, >>> FM_EVT_RDA_ROLE, >>> - FM_EVT_SVC_DOWN, >>> FM_FSM_EVT_MAX >>> } FM_FSM_EVT_CODE; >>> >>> diff --git a/src/fm/fmd/fm_main.c b/src/fm/fmd/fm_main.c >>> --- a/src/fm/fmd/fm_main.c >>> +++ b/src/fm/fmd/fm_main.c >>> @@ -1,6 +1,7 @@ >>> /* -*- OpenSAF -*- >>> * >>> * (C) Copyright 2008 The OpenSAF Foundation >>> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights >>> reserved. >>> * >>> * This program is distributed in the hope that it will be useful, but >>> * WITHOUT ANY WARRANTY; without even the implied warranty of >>> MERCHANTABILITY >>> @@ -31,6 +32,7 @@ This file contains the main() routine fo >>> #include "nid/agent/nid_api.h" >>> #include "fm.h" >>> #include "base/osaf_time.h" >>> +#include "base/osaf_poll.h" >>> >>> #define FM_CLM_API_TIMEOUT 10000000000LL >>> >>> @@ -71,7 +73,6 @@ void handle_mbx_event(void); >>> extern uint32_t fm_amf_init(FM_AMF_CB *fm_amf_cb); >>> uint32_t gl_fm_hdl; >>> static NCS_SEL_OBJ usr1_sel_obj; >>> -void fm_proc_svc_down(FM_CB *cb, FM_EVT *fm_mbx_evt); >>> >>> /** >>> * USR1 signal is used when AMF wants instantiate us as a >>> @@ -119,6 +120,40 @@ static void rda_cb(uint32_t cb_hdl, PCS_ >>> TRACE_LEAVE(); >>> } >>> >>> +/* This function primarily handles the weird situation in a cluster >>> where the controller >>> + * node which is coming up identifies the peer node is in the midst >>> of DOWN process (i.e., >>> + * non-existance of peer FM and amfd/amfnd is still alive). In this >>> case, the controller >>> + * node has to wait till the peer gracefully shutdowns. This >>> function returns FAILURE if >>> + * peer controller node is not down in a timeout period of >>> OPENSAF_TERMTIMEOUT (or 60 secs default). >>> + */ >>> +static uint32_t fm_peer_down_wait(FM_CB *fm_cb) >>> +{ >>> + char *envVar = NULL; >>> + int peer_term_timeout = 60; /*default 60 secs */ >>> + >>> + TRACE_ENTER(); >>> + >>> + /* Hoping that "OPENSAF_TERMTIMEOUT" on both the controllers >>> shall be the same */ >>> + if ((envVar = getenv("OPENSAF_TERMTIMEOUT"))) >>> + peer_term_timeout = atoi(envVar); >>> + >>> + m_NCS_SEL_OBJ_CREATE(&fm_cb->peer_down_obj); >>> + fm_cb->peer_down_await = 1; >>> + >>> + osaf_poll_one_fd(m_GET_FD_FROM_SEL_OBJ(fm_cb->peer_down_obj), >>> peer_term_timeout*1000); >>> + >>> + m_NCS_SEL_OBJ_DESTROY(&fm_cb->peer_down_obj); >>> + >>> + /* Return failure if peer node is not yet completely down */ >>> + if(fm_cb->peer_down_await) { >>> + LOG_ER("Peer node is not fully DOWN, please check"); >>> + TRACE_LEAVE(); >>> + return NCSCC_RC_FAILURE; >>> + } >>> + >>> + TRACE_LEAVE(); >>> + return NCSCC_RC_SUCCESS; >>> +} >>> >>> >>> /***************************************************************************** >>> >>> >>> @@ -176,6 +211,11 @@ int main(int argc, char *argv[]) >>> */ >>> fm_cb->control_tipc = true; /* Default behaviour */ >>> >>> + fm_cb->immd_down = true; >>> + fm_cb->immnd_down = true; >>> + fm_cb->amfnd_down = true; >>> + fm_cb->amfd_down = true; >>> + >>> /* Create CB handle */ >>> gl_fm_hdl = ncshm_create_hdl(NCS_HM_POOL_ID_COMMON, >>> NCS_SERVICE_ID_GFM, (NCSCONTEXT)fm_cb); >>> >>> @@ -194,7 +234,7 @@ int main(int argc, char *argv[]) >>> goto fm_init_failed; >>> } >>> >>> -/* Attach MBX */ >>> + /* Attach MBX */ >>> if (m_NCS_IPC_ATTACH(&fm_cb->mbx) != NCSCC_RC_SUCCESS) { >>> syslog(LOG_ERR, "m_NCS_IPC_ATTACH() failed."); >>> goto fm_init_failed; >>> @@ -245,6 +285,16 @@ int main(int argc, char *argv[]) >>> goto fm_init_failed; >>> } >>> >>> + /* Weird and rare situation. If peer fm doesn't exist, but >>> amfd/amfnd process(es) >>> + * are still alive then wait till the peer gracefully shutsdown. >>> + */ >>> + if((!fm_cb->peer_sc_up) && !(fm_cb->amfnd_down && >>> fm_cb->amfd_down)) { >>> + if(fm_peer_down_wait(fm_cb) != NCSCC_RC_SUCCESS) { >>> + LOG_ER("Exiting.. Peer node is not completely >>> DOWN, please check"); >>> + goto fm_init_failed; >>> + } >>> + } >>> + >>> /* Get mailbox selection object */ >>> mbx_sel_obj = m_NCS_IPC_GET_SEL_OBJ(&fm_cb->mbx); >>> >>> @@ -268,7 +318,7 @@ int main(int argc, char *argv[]) >>> >>> /* notify the NID */ >>> if (nid_started) >>> - fm_nid_notify(NCSCC_RC_SUCCESS); >>> + fm_nid_notify((uint32_t) NCSCC_RC_SUCCESS); >>> >>> while (1) { >>> ret = poll(fds, nfds, -1); >>> @@ -454,52 +504,6 @@ static uint32_t fm_get_args(FM_CB *fm_cb >>> return NCSCC_RC_SUCCESS; >>> } >>> >>> -void fm_proc_svc_down(FM_CB *cb, FM_EVT *fm_mbx_evt) >>> -{ >>> - switch (fm_mbx_evt->svc_id) { >>> - case NCSMDS_SVC_ID_IMMND: >>> - cb->immnd_down = true; >>> - LOG_NO("IMMND down on: %x", cb->peer_node_id); >>> - break; >>> - case NCSMDS_SVC_ID_AVND: >>> - cb->amfnd_down = true; >>> - LOG_NO("AMFND down on: %x", cb->peer_node_id); >>> - break; >>> - case NCSMDS_SVC_ID_IMMD: >>> - cb->immd_down = true; >>> - LOG_NO("IMMD down on: %x", cb->peer_node_id); >>> - break; >>> - case NCSMDS_SVC_ID_AVD: >>> - cb->amfd_down = true; >>> - LOG_NO("AVD down on: %x", cb->peer_node_id); >>> - break; >>> - case NCSMDS_SVC_ID_GFM: >>> - cb->fm_down = true; >>> - LOG_NO("FM down on: %x", cb->peer_node_id); >>> - break; >>> - default: >>> - break; >>> - } >>> - >>> - /* Processing only for alternate node. >>> - * Service downs of AMFND, IMMD, IMMND is the same as NODE_DOWN >>> from 4.4 onwards. >>> - * This is required to handle the usecase involving >>> - * '/etc/init.d/opensafd stop' without an OS reboot cycle >>> - * Process service downs only if OpenSAF is not controlling TIPC. >>> - * If OpenSAF is controlling TIPC, just wait for NODE_DOWN to >>> trigger failover. >>> - */ >>> - if (cb->immd_down && cb->immnd_down && cb->amfnd_down && >>> cb->amfd_down && cb->fm_down) { >>> - LOG_NO("Core services went down on node_id: %x", >>> fm_mbx_evt->node_id); >>> - fm_send_node_down_to_mbx(cb, fm_mbx_evt->node_id); >>> - /* Reset peer downs, because we've made MDS RED >>> subscriptions */ >>> - cb->immd_down = false; >>> - cb->immnd_down = false; >>> - cb->amfnd_down = false; >>> - cb->amfd_down = false; >>> - cb->fm_down = false; >>> - } >>> -} >>> - >>> >>> /**************************************************************************** >>> >>> * Name : fm_clm_init >>> * >>> @@ -642,11 +646,10 @@ static void fm_mbx_msg_handler(FM_CB *fm >>> } >>> } >>> break; >>> - case FM_EVT_SVC_DOWN: >>> - fm_proc_svc_down(fm_cb, fm_mbx_evt); >>> - break; >>> + >>> case FM_EVT_PEER_UP: >>> -/* Peer fm came up so sending ee_id of this node */ >>> + >>> + /* Peer fm came up so sending ee_id of this node */ >>> if (fm_cb->node_name.length != 0) >>> fms_fms_exchange_node_info(fm_cb); >>> >>> @@ -654,8 +657,9 @@ static void fm_mbx_msg_handler(FM_CB *fm >>> get_peer_clm_node_name(fm_mbx_evt->node_id); >>> } >>> break; >>> + >>> case FM_EVT_TMR_EXP: >>> -/* Timer Expiry event posted */ >>> + /* Timer Expiry event posted */ >>> if (fm_mbx_evt->info.fm_tmr->type == FM_TMR_PROMOTE_ACTIVE) { >>> /* Check whether node(AMF) initialization is done */ >>> if (fm_cb->csi_assigned == false) { >>> @@ -684,9 +688,11 @@ static void fm_mbx_msg_handler(FM_CB *fm >>> "within the time limit"); >>> } >>> break; >>> + >>> case FM_EVT_RDA_ROLE: >>> fm_evt_proc_rda_callback(fm_cb, fm_mbx_evt); >>> break; >>> + >>> default: >>> break; >>> } >>> diff --git a/src/fm/fmd/fm_mds.c b/src/fm/fmd/fm_mds.c >>> --- a/src/fm/fmd/fm_mds.c >>> +++ b/src/fm/fmd/fm_mds.c >>> @@ -1,6 +1,7 @@ >>> /* -*- OpenSAF -*- >>> * >>> * (C) Copyright 2008 The OpenSAF Foundation >>> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights >>> reserved. >>> * >>> * This program is distributed in the hope that it will be useful, but >>> * WITHOUT ANY WARRANTY; without even the implied warranty of >>> MERCHANTABILITY >>> @@ -34,6 +35,7 @@ static void check_for_node_isolation(FM_ >>> static bool has_been_well_connected_recently(FM_CB *cb); >>> static uint32_t fm_mds_node_evt(FM_CB *cb, >>> MDS_CALLBACK_NODE_EVENT_INFO * node_evt); >>> static uint32_t fm_fill_mds_evt_post_fm_mbx(FM_CB *cb, FM_EVT >>> *fm_evt, NODE_ID node_id, FM_FSM_EVT_CODE evt_code); >>> +static void fm_proc_svc_down(FM_CB *cb, uint32_t node_id, >>> NCSMDS_SVC_ID svc_id); >>> >>> uint32_t >>> fm_mds_sync_send(FM_CB *fm_cb, NCSCONTEXT msg, >>> @@ -62,7 +64,7 @@ uint32_t fm_mds_init(FM_CB *cb) >>> { >>> NCSMDS_INFO arg; >>> MDS_SVC_ID svc_id[] = { NCSMDS_SVC_ID_GFM, NCSMDS_SVC_ID_AVND, >>> NCSMDS_SVC_ID_IMMND }; >>> - MDS_SVC_ID immd_id[2] = { NCSMDS_SVC_ID_IMMD, NCSMDS_SVC_ID_AVD }; >>> + MDS_SVC_ID svc_red_id[2] = { NCSMDS_SVC_ID_IMMD, >>> NCSMDS_SVC_ID_AVD }; >>> >>> /* Get the MDS handles to be used. */ >>> if (fm_mds_get_adest_hdls(cb) != NCSCC_RC_SUCCESS) { >>> @@ -111,7 +113,7 @@ uint32_t fm_mds_init(FM_CB *cb) >>> arg.i_op = MDS_RED_SUBSCRIBE; >>> arg.info.svc_subscribe.i_num_svcs = 2; >>> arg.info.svc_subscribe.i_scope = NCSMDS_SCOPE_NONE; >>> - arg.info.svc_subscribe.i_svc_ids = immd_id; >>> + arg.info.svc_subscribe.i_svc_ids = svc_red_id; >>> if (ncsmds_api(&arg) == NCSCC_RC_FAILURE) { >>> syslog(LOG_ERR, "MDS_RED_SUBSCRIBE failed"); >>> arg.i_op = MDS_UNINSTALL; >>> @@ -285,25 +287,56 @@ uint32_t fm_send_node_down_to_mbx(FM_CB >>> return rc; >>> } >>> >>> -static void fm_send_svc_down_to_mbx(FM_CB *cb, uint32_t node_id, >>> NCSMDS_SVC_ID svc_id) >>> +void fm_proc_svc_down(FM_CB *cb, uint32_t node_id, NCSMDS_SVC_ID >>> svc_id) >>> { >>> - FM_EVT *fm_evt = NULL; >>> - uint32_t rc = NCSCC_RC_SUCCESS; >>> - fm_evt = m_MMGR_ALLOC_FM_EVT; >>> - if (NULL == fm_evt) { >>> - syslog(LOG_INFO, "fm_mds_rcv_evt: fm_evt allocation FAILED."); >>> - return; >>> + TRACE_ENTER2("SVC ID: %d", (int) svc_id); >>> + switch (svc_id) { >>> + case NCSMDS_SVC_ID_IMMND: >>> + cb->immnd_down = true; >>> + LOG_NO("IMMND down on: %x", cb->peer_node_id); >>> + break; >>> + case NCSMDS_SVC_ID_AVND: >>> + cb->amfnd_down = true; >>> + LOG_NO("AMFND down on: %x", cb->peer_node_id); >>> + break; >>> + case NCSMDS_SVC_ID_IMMD: >>> + cb->immd_down = true; >>> + LOG_NO("IMMD down on: %x", cb->peer_node_id); >>> + break; >>> + case NCSMDS_SVC_ID_AVD: >>> + cb->amfd_down = true; >>> + LOG_NO("AVD down on: %x", cb->peer_node_id); >>> + break; >>> + case NCSMDS_SVC_ID_GFM: >>> + cb->fm_down = true; >>> + LOG_NO("FM down on: %x", cb->peer_node_id); >>> + break; >>> + default: >>> + break; >>> } >>> - fm_evt->svc_id = svc_id; >>> - rc = fm_fill_mds_evt_post_fm_mbx(cb, fm_evt, node_id, >>> FM_EVT_SVC_DOWN); >>> - if (rc == NCSCC_RC_FAILURE) { >>> - m_MMGR_FREE_FM_EVT(fm_evt); >>> - LOG_IN("service down event post to mailbox failed"); >>> - fm_evt = NULL; >>> + >>> + /* Processing only for alternate node. >>> + * Service downs of AMFND, IMMD, IMMND is the same as NODE_DOWN >>> from 4.4 onwards. >>> + * This is required to handle the usecase involving >>> + * '/etc/init.d/opensafd stop' without an OS reboot cycle >>> + * Process service downs only if OpenSAF is not controlling TIPC. >>> + * If OpenSAF is controlling TIPC, just wait for NODE_DOWN to >>> trigger failover. >>> + */ >>> + if (cb->immd_down && cb->immnd_down && cb->amfnd_down && >>> cb->amfd_down && cb->fm_down) { >>> + LOG_NO("Core services went down on node_id: %x", node_id); >>> + if (cb->peer_down_await) { >>> + cb->peer_down_await = 0; >>> + m_NCS_SEL_OBJ_IND(&cb->peer_down_obj); >>> + } >>> + >>> + if(!cb->control_tipc) >>> + fm_send_node_down_to_mbx(cb, node_id); >>> } >>> - return; >>> + >>> + TRACE_LEAVE(); >>> } >>> >>> + >>> static void check_for_node_isolation(FM_CB *cb) >>> { >>> bool well_connected = cb->peer_sc_up && cb->cluster_size >= 3; >>> @@ -393,8 +426,7 @@ static uint32_t fm_mds_node_evt(FM_CB *c >>> >>> *****************************************************************************/ >>> >>> static uint32_t fm_mds_svc_evt(FM_CB *cb, >>> MDS_CALLBACK_SVC_EVENT_INFO *svc_evt) >>> { >>> - uint32_t return_val = NCSCC_RC_SUCCESS; >>> - FM_EVT *fm_evt; >>> + FM_EVT *fm_evt = NULL; >>> TRACE_ENTER(); >>> >>> if (NULL == svc_evt) { >>> @@ -413,43 +445,29 @@ static uint32_t fm_mds_svc_evt(FM_CB *cb >>> cb->peer_sc_up = false; >>> check_for_node_isolation(cb); >>> cb->peer_adest = 0; >>> - if (!cb->control_tipc) { >>> - fm_send_svc_down_to_mbx(cb, >>> svc_evt->i_node_id, svc_evt->i_svc_id); >>> - } >>> + >>> + fm_proc_svc_down(cb, svc_evt->i_node_id, >>> svc_evt->i_svc_id); >>> } >>> break; >>> case NCSMDS_SVC_ID_IMMND: >>> - if (svc_evt->i_node_id == cb->peer_node_id >>> - && !cb->control_tipc) { >>> - fm_send_svc_down_to_mbx(cb, svc_evt->i_node_id, >>> svc_evt->i_svc_id); >>> - } >>> - break; >>> case NCSMDS_SVC_ID_AVND: >>> - if (svc_evt->i_node_id == cb->peer_node_id >>> - && !cb->control_tipc) { >>> - fm_send_svc_down_to_mbx(cb, svc_evt->i_node_id, >>> svc_evt->i_svc_id); >>> + if (svc_evt->i_node_id == cb->peer_node_id) { >>> + fm_proc_svc_down(cb, svc_evt->i_node_id, >>> svc_evt->i_svc_id); >>> } >>> break; >>> default: >>> TRACE("Not interested in service down of other >>> services"); >>> break; >>> } >>> - >>> break; >>> >>> case NCSMDS_RED_DOWN: >>> switch (svc_evt->i_svc_id) { >>> /* Depend on service downs if OpenSAF is not controling >>> TIPC */ >>> case NCSMDS_SVC_ID_IMMD: >>> - if (svc_evt->i_node_id == cb->peer_node_id >>> - && !cb->control_tipc) { >>> - fm_send_svc_down_to_mbx(cb, svc_evt->i_node_id, >>> svc_evt->i_svc_id); >>> - } >>> - break; >>> case NCSMDS_SVC_ID_AVD: >>> - if (svc_evt->i_node_id == cb->peer_node_id >>> - && !cb->control_tipc) { >>> - fm_send_svc_down_to_mbx(cb, svc_evt->i_node_id, >>> svc_evt->i_svc_id); >>> + if (svc_evt->i_node_id == cb->peer_node_id) { >>> + fm_proc_svc_down(cb, svc_evt->i_node_id, >>> svc_evt->i_svc_id); >>> } >>> break; >>> default: >>> @@ -465,43 +483,96 @@ static uint32_t fm_mds_svc_evt(FM_CB *cb >>> TRACE("Peer fm status change: %d -> %d, peer node id >>> is: %x, cluster size is %llu", >>> (int) cb->peer_sc_up, 1, svc_evt->i_node_id, >>> (unsigned long long) cb->cluster_size); >>> cb->peer_sc_up = true; >>> + cb->fm_down = false; >>> check_for_node_isolation(cb); >>> >>> fm_evt = m_MMGR_ALLOC_FM_EVT; >>> - if (NULL == fm_evt) { >>> - syslog(LOG_INFO, "fm_mds_svc_evt: fm_evt >>> allocation FAILED."); >>> - return NCSCC_RC_FAILURE; >>> - } >>> + if (NULL == fm_evt) { >>> + syslog(LOG_INFO, "fm_mds_svc_evt: fm_evt >>> allocation FAILED."); >>> + return NCSCC_RC_FAILURE; >>> + } >>> + >>> cb->peer_adest = svc_evt->i_dest; >>> cb->peer_node_id = svc_evt->i_node_id; >>> cb->peer_node_terminated = false; >>> - return_val = fm_fill_mds_evt_post_fm_mbx(cb, fm_evt, >>> cb->peer_node_id, FM_EVT_PEER_UP); >>> >>> - if (NCSCC_RC_FAILURE == return_val) { >>> - m_MMGR_FREE_FM_EVT(fm_evt); >>> - fm_evt = NULL; >>> - } >>> + if(fm_fill_mds_evt_post_fm_mbx(cb, fm_evt, >>> cb->peer_node_id, FM_EVT_PEER_UP) == NCSCC_RC_FAILURE) >>> + { >>> + m_MMGR_FREE_FM_EVT(fm_evt); >>> + fm_evt = NULL; >>> + } >>> } >>> break; >>> + >>> case NCSMDS_SVC_ID_IMMND: >>> - if (svc_evt->i_node_id == cb->peer_node_id >>> - && !cb->control_tipc) >>> - cb->immnd_down = false; /* Only IMMND is >>> restartable */ >>> + if (svc_evt->i_node_id == cb->peer_node_id){ >>> + TRACE("Peer immnd status change: %d -> %d, peer node >>> id is: %x, cluster size is %llu", >>> + (int) cb->peer_sc_up, 1, svc_evt->i_node_id, >>> (unsigned long long) cb->cluster_size); >>> + cb->immnd_down = false; >>> + } >>> + break; >>> + >>> + case NCSMDS_SVC_ID_AVND: >>> + if (svc_evt->i_node_id == cb->peer_node_id){ >>> + TRACE("Peer amfnd status change: %d -> %d, peer node >>> id is: %x, cluster size is %llu", >>> + (int) cb->peer_sc_up, 1, svc_evt->i_node_id, >>> (unsigned long long) cb->cluster_size); >>> + cb->amfnd_down = false; >>> + } >>> break; >>> default: >>> break; >>> } >>> break; >>> >>> + case NCSMDS_RED_UP: >>> + switch (svc_evt->i_svc_id) { >>> + /* Depend on service downs if OpenSAF is not controling TIPC */ >>> + case NCSMDS_SVC_ID_IMMD: >>> + if (svc_evt->i_node_id != cb->node_id) { >>> + TRACE("Peer immd status change: %d -> %d, peer node >>> id is: %x, cluster size is %llu", >>> + (int) cb->peer_sc_up, 1, svc_evt->i_node_id, >>> (unsigned long long) cb->cluster_size); >>> + cb->peer_node_id = svc_evt->i_node_id; >>> + cb->immd_down = false; >>> + >>> + /* Arrived svc up event of amfd/amfnd/immd/immnd >>> svc's with out fm svc-up event being arrived. >>> + * It can be due to peer node is going down but not >>> fully down. hence reboot the node. >>> + */ >>> + if (!fm_cb->peer_sc_up) >>> + opensaf_reboot(0, NULL, "Peer is not completely >>> DOWN, Received svc up of peer IMMD"); >>> + } >>> + break; >>> + >>> + case NCSMDS_SVC_ID_AVD: >>> + if (svc_evt->i_node_id != cb->node_id) { >>> + TRACE("Peer amfd status change: %d -> %d, peer node >>> id is: %x, cluster size is %llu", >>> + (int) cb->peer_sc_up, 1, svc_evt->i_node_id, >>> (unsigned long long) cb->cluster_size); >>> + cb->peer_node_id = svc_evt->i_node_id; >>> + cb->amfd_down = false; >>> + >>> + /* Arrived svc up event of amfd/amfnd/immd/immnd >>> svc's with out fm svc-up event being arrived. >>> + * It can be due to peer node is going down but not >>> fully down. hence reboot the node. >>> + */ >>> + if (!fm_cb->peer_sc_up) >>> + opensaf_reboot(0, NULL, "Peer is not completely >>> DOWN, Received svc up of peer AMFD"); >>> + } >>> + break; >>> + >>> + default: >>> + TRACE("Not interested in service down of other services"); >>> + break; >>> + } >>> + break; >>> + >>> default: >>> syslog(LOG_INFO, "Wrong MDS event"); >>> break; >>> } >>> >>> TRACE_LEAVE(); >>> - return return_val; >>> + return NCSCC_RC_SUCCESS; >>> } >>> >>> + >>> >>> /*************************************************************************** >>> >>> * Name : fm_mds_rcv_evt >>> * >>> > ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel