Add supervision timer so controller will reboot if it cannot obtain consensus lock within the allocation period (2* FMS_TAKEOVER_REQUEST_VALID_TIME).
The peer controller can then safely perform a node failover after this period of time. --- src/fm/fmd/fm_cb.h | 2 ++ src/fm/fmd/fm_main.cc | 14 ++++++++- src/fm/fmd/fm_rda.cc | 78 +++++++++++++++++++++++++++++++++++---------------- 3 files changed, 69 insertions(+), 25 deletions(-) diff --git a/src/fm/fmd/fm_cb.h b/src/fm/fmd/fm_cb.h index 6eb0d54..b5ea5ae 100644 --- a/src/fm/fmd/fm_cb.h +++ b/src/fm/fmd/fm_cb.h @@ -39,6 +39,7 @@ typedef enum { FM_TMR_TYPE_MIN, FM_TMR_PROMOTE_ACTIVE, FM_TMR_ACTIVATION_SUPERVISION, + FM_TMR_CONSENSUS_SERVICE_SUPERVISION, FM_TMR_TYPE_MAX } FM_TMR_TYPE; @@ -83,6 +84,7 @@ struct FM_CB { /* Timers */ FM_TMR promote_active_tmr{}; FM_TMR activation_supervision_tmr{}; + FM_TMR consensus_service_supervision_tmr{}; /* Time in terms of one hundredth of seconds (500 for 5 secs.) */ uint32_t active_promote_tmr_val{}; diff --git a/src/fm/fmd/fm_main.cc b/src/fm/fmd/fm_main.cc index 2eb3c16..4a843cc 100644 --- a/src/fm/fmd/fm_main.cc +++ b/src/fm/fmd/fm_main.cc @@ -59,7 +59,8 @@ static uint32_t fm_get_args(FM_CB *); static uint32_t fms_fms_exchange_node_info(FM_CB *); static uint32_t fms_fms_inform_terminating(FM_CB *fm_cb); static uint32_t fm_nid_notify(uint32_t); -static uint32_t fm_tmr_start(FM_TMR *, SaTimeT); +uint32_t fm_tmr_start(FM_TMR *, SaTimeT); +void fm_tmr_stop(FM_TMR *tmr); static SaAisErrorT get_peer_clm_node_name(NODE_ID); static SaAisErrorT fm_clm_init(); static void fm_mbx_msg_handler(FM_CB *, FM_EVT *); @@ -449,6 +450,8 @@ static uint32_t fm_get_args(FM_CB *fm_cb) { /* Set timer variables */ fm_cb->promote_active_tmr.type = FM_TMR_PROMOTE_ACTIVE; fm_cb->activation_supervision_tmr.type = FM_TMR_ACTIVATION_SUPERVISION; + fm_cb->consensus_service_supervision_tmr.type = + FM_TMR_CONSENSUS_SERVICE_SUPERVISION; char *node_isolation_timeout = getenv("FMS_NODE_ISOLATION_TIMEOUT"); if (node_isolation_timeout != NULL) { @@ -704,6 +707,11 @@ static void fm_mbx_msg_handler(FM_CB *fm_cb, FM_EVT *fm_mbx_evt) { "Activation timer supervision " "expired: no ACTIVE assignment received " "within the time limit"); + } else if (fm_mbx_evt->info.fm_tmr->type == + FM_TMR_CONSENSUS_SERVICE_SUPERVISION) { + opensaf_quick_reboot("Consensus service supervision " + "expired: controller was not promoted " + "within the time limit"); } break; @@ -728,6 +736,10 @@ static void fm_evt_proc_rda_callback(FM_CB *cb, FM_EVT *evt) { uint32_t rc = NCSCC_RC_SUCCESS; TRACE_ENTER2("%d", (int)evt->info.rda_info.role); + if (evt->info.rda_info.role == PCS_RDA_ACTIVE) { + LOG_NO("Controller promoted. Stop supervision timer"); + fm_tmr_stop(&fm_cb->consensus_service_supervision_tmr); + } if (evt->info.rda_info.role != PCS_RDA_ACTIVE && cb->activation_supervision_tmr.status == FM_TMR_RUNNING) { fm_tmr_stop(&cb->activation_supervision_tmr); diff --git a/src/fm/fmd/fm_rda.cc b/src/fm/fmd/fm_rda.cc index d3063ba..0544152 100644 --- a/src/fm/fmd/fm_rda.cc +++ b/src/fm/fmd/fm_rda.cc @@ -23,6 +23,8 @@ #include "osaf/consensus/consensus.h" #include "rde/agent/rda_papi.h" +extern uint32_t fm_tmr_start(FM_TMR *tmr, SaTimeT period); +extern void fm_tmr_stop(FM_TMR *tmr); extern void rda_cb(uint32_t cb_hdl, PCS_RDA_CB_INFO *cb_info, PCSRDA_RETURN_CODE error_code); /**************************************************************************** @@ -64,6 +66,47 @@ done: return rc; } +void promote_node(FM_CB *fm_cb) { + TRACE_ENTER(); + + Consensus consensus_service; + if (consensus_service.PrioritisePartitionSize() == true) { + // Allow topology events to be processed first. The MDS thread may + // be processing MDS down events and updating cluster_size concurrently. + // We need cluster_size to be as accurate as possible, without waiting + // too long for node down events. + std::this_thread::sleep_for(std::chrono::seconds(2)); + } + + uint32_t rc; + rc = consensus_service.PromoteThisNode(true, fm_cb->cluster_size); + if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) { + LOG_ER("Unable to set active controller in consensus service"); + opensaf_quick_reboot("Unable to set active controller " + "in consensus service"); + } else if (rc == SA_AIS_ERR_EXIST) { + // @todo if we don't reboot, we don't seem to recover from this. Can we + // improve? + LOG_ER( + "A controller is already active. We were separated from the " + "cluster?"); + opensaf_quick_reboot("A controller is already active. We were separated " + "from the cluster?"); + } + + PCS_RDA_REQ rda_req; + + /* set the RDA role to active */ + memset(&rda_req, 0, sizeof(PCS_RDA_REQ)); + rda_req.req_type = PCS_RDA_SET_ROLE; + rda_req.info.io_role = PCS_RDA_ACTIVE; + + rc = pcs_rda_request(&rda_req); + if (rc != PCSRDA_RC_SUCCESS) { + LOG_ER("pcs_rda_request() failed)"); + } +} + /**************************************************************************** * Name : fm_rda_set_role * @@ -88,30 +131,17 @@ uint32_t fm_rda_set_role(FM_CB *fm_cb, PCS_RDA_ROLE role) { Consensus consensus_service; if (consensus_service.IsEnabled() == true) { - if (consensus_service.PrioritisePartitionSize() == true) { - // Allow topology events to be processed first. The MDS thread may - // be processing MDS down events and updating cluster_size concurrently. - // We need cluster_size to be as accurate as possible, without waiting - // too long for node down events. - std::this_thread::sleep_for(std::chrono::seconds(4)); - } - - rc = consensus_service.PromoteThisNode(true, fm_cb->cluster_size); - if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) { - LOG_ER("Unable to set active controller in consensus service"); - opensaf_quick_reboot("Unable to set active controller " - "in consensus service"); - return NCSCC_RC_FAILURE; - } else if (rc == SA_AIS_ERR_EXIST) { - // @todo if we don't reboot, we don't seem to recover from this. Can we - // improve? - LOG_ER( - "A controller is already active. We were separated from the " - "cluster?"); - opensaf_quick_reboot("A controller is already active. We were separated " - "from the cluster?"); - return NCSCC_RC_FAILURE; - } + // Start supervision timer, make sure we obtain lock within + // 2* FMS_TAKEOVER_REQUEST_VALID_TIME, otherwise reboot the node. + // This is needed in case we are in a split network situation + // the current active will fail-over work running on this node. + LOG_NO("Starting consensus service supervision: %u s", + consensus_service.TakeoverValidTime()); + fm_tmr_start(&fm_cb->consensus_service_supervision_tmr, + 200 * consensus_service.TakeoverValidTime()); + + std::thread(&promote_node, fm_cb).detach(); + return NCSCC_RC_SUCCESS; } rc = pcs_rda_request(&rda_req); -- 2.7.4 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel