Add supervision timer so controller will reboot if it cannot obtain
consensus lock within the allocation period
(2* FMS_TAKEOVER_REQUEST_VALID_TIME).

The peer controller can then safely perform a node failover
after this period of time.
---
 src/fm/fmd/fm_cb.h    |  2 ++
 src/fm/fmd/fm_main.cc | 14 ++++++++-
 src/fm/fmd/fm_rda.cc  | 78 +++++++++++++++++++++++++++++++++++----------------
 3 files changed, 69 insertions(+), 25 deletions(-)

diff --git a/src/fm/fmd/fm_cb.h b/src/fm/fmd/fm_cb.h
index 6eb0d54..b5ea5ae 100644
--- a/src/fm/fmd/fm_cb.h
+++ b/src/fm/fmd/fm_cb.h
@@ -39,6 +39,7 @@ typedef enum {
   FM_TMR_TYPE_MIN,
   FM_TMR_PROMOTE_ACTIVE,
   FM_TMR_ACTIVATION_SUPERVISION,
+  FM_TMR_CONSENSUS_SERVICE_SUPERVISION,
   FM_TMR_TYPE_MAX
 } FM_TMR_TYPE;
 
@@ -83,6 +84,7 @@ struct FM_CB {
   /* Timers */
   FM_TMR promote_active_tmr{};
   FM_TMR activation_supervision_tmr{};
+  FM_TMR consensus_service_supervision_tmr{};
 
   /* Time in terms of one hundredth of seconds (500 for 5 secs.) */
   uint32_t active_promote_tmr_val{};
diff --git a/src/fm/fmd/fm_main.cc b/src/fm/fmd/fm_main.cc
index 2eb3c16..4a843cc 100644
--- a/src/fm/fmd/fm_main.cc
+++ b/src/fm/fmd/fm_main.cc
@@ -59,7 +59,8 @@ static uint32_t fm_get_args(FM_CB *);
 static uint32_t fms_fms_exchange_node_info(FM_CB *);
 static uint32_t fms_fms_inform_terminating(FM_CB *fm_cb);
 static uint32_t fm_nid_notify(uint32_t);
-static uint32_t fm_tmr_start(FM_TMR *, SaTimeT);
+uint32_t fm_tmr_start(FM_TMR *, SaTimeT);
+void fm_tmr_stop(FM_TMR *tmr);
 static SaAisErrorT get_peer_clm_node_name(NODE_ID);
 static SaAisErrorT fm_clm_init();
 static void fm_mbx_msg_handler(FM_CB *, FM_EVT *);
@@ -449,6 +450,8 @@ static uint32_t fm_get_args(FM_CB *fm_cb) {
   /* Set timer variables */
   fm_cb->promote_active_tmr.type = FM_TMR_PROMOTE_ACTIVE;
   fm_cb->activation_supervision_tmr.type = FM_TMR_ACTIVATION_SUPERVISION;
+  fm_cb->consensus_service_supervision_tmr.type =
+    FM_TMR_CONSENSUS_SERVICE_SUPERVISION;
 
   char *node_isolation_timeout = getenv("FMS_NODE_ISOLATION_TIMEOUT");
   if (node_isolation_timeout != NULL) {
@@ -704,6 +707,11 @@ static void fm_mbx_msg_handler(FM_CB *fm_cb, FM_EVT 
*fm_mbx_evt) {
                        "Activation timer supervision "
                        "expired: no ACTIVE assignment received "
                        "within the time limit");
+      } else if (fm_mbx_evt->info.fm_tmr->type ==
+                 FM_TMR_CONSENSUS_SERVICE_SUPERVISION) {
+        opensaf_quick_reboot("Consensus service supervision "
+                             "expired: controller was not promoted "
+                             "within the time limit");
       }
       break;
 
@@ -728,6 +736,10 @@ static void fm_evt_proc_rda_callback(FM_CB *cb, FM_EVT 
*evt) {
   uint32_t rc = NCSCC_RC_SUCCESS;
 
   TRACE_ENTER2("%d", (int)evt->info.rda_info.role);
+  if (evt->info.rda_info.role == PCS_RDA_ACTIVE) {
+    LOG_NO("Controller promoted. Stop supervision timer");
+    fm_tmr_stop(&fm_cb->consensus_service_supervision_tmr);
+  }
   if (evt->info.rda_info.role != PCS_RDA_ACTIVE &&
       cb->activation_supervision_tmr.status == FM_TMR_RUNNING) {
     fm_tmr_stop(&cb->activation_supervision_tmr);
diff --git a/src/fm/fmd/fm_rda.cc b/src/fm/fmd/fm_rda.cc
index d3063ba..0544152 100644
--- a/src/fm/fmd/fm_rda.cc
+++ b/src/fm/fmd/fm_rda.cc
@@ -23,6 +23,8 @@
 #include "osaf/consensus/consensus.h"
 #include "rde/agent/rda_papi.h"
 
+extern uint32_t fm_tmr_start(FM_TMR *tmr, SaTimeT period);
+extern void fm_tmr_stop(FM_TMR *tmr);
 extern void rda_cb(uint32_t cb_hdl, PCS_RDA_CB_INFO *cb_info,
                    PCSRDA_RETURN_CODE error_code);
 /****************************************************************************
@@ -64,6 +66,47 @@ done:
   return rc;
 }
 
+void promote_node(FM_CB *fm_cb) {
+  TRACE_ENTER();
+
+  Consensus consensus_service;
+  if (consensus_service.PrioritisePartitionSize() == true) {
+    // Allow topology events to be processed first. The MDS thread may
+    // be processing MDS down events and updating cluster_size concurrently.
+    // We need cluster_size to be as accurate as possible, without waiting
+    // too long for node down events.
+    std::this_thread::sleep_for(std::chrono::seconds(2));
+  }
+
+  uint32_t rc;
+  rc = consensus_service.PromoteThisNode(true, fm_cb->cluster_size);
+  if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) {
+    LOG_ER("Unable to set active controller in consensus service");
+    opensaf_quick_reboot("Unable to set active controller "
+      "in consensus service");
+  } else if (rc == SA_AIS_ERR_EXIST) {
+    // @todo if we don't reboot, we don't seem to recover from this. Can we
+    // improve?
+    LOG_ER(
+        "A controller is already active. We were separated from the "
+        "cluster?");
+    opensaf_quick_reboot("A controller is already active. We were separated "
+                         "from the cluster?");
+  }
+
+  PCS_RDA_REQ rda_req;
+
+  /* set the RDA role to active */
+  memset(&rda_req, 0, sizeof(PCS_RDA_REQ));
+  rda_req.req_type = PCS_RDA_SET_ROLE;
+  rda_req.info.io_role = PCS_RDA_ACTIVE;
+
+  rc = pcs_rda_request(&rda_req);
+  if (rc != PCSRDA_RC_SUCCESS) {
+    LOG_ER("pcs_rda_request() failed)");
+  }
+}
+
 /****************************************************************************
  * Name          : fm_rda_set_role
  *
@@ -88,30 +131,17 @@ uint32_t fm_rda_set_role(FM_CB *fm_cb, PCS_RDA_ROLE role) {
 
   Consensus consensus_service;
   if (consensus_service.IsEnabled() == true) {
-    if (consensus_service.PrioritisePartitionSize() == true) {
-      // Allow topology events to be processed first. The MDS thread may
-      // be processing MDS down events and updating cluster_size concurrently.
-      // We need cluster_size to be as accurate as possible, without waiting
-      // too long for node down events.
-      std::this_thread::sleep_for(std::chrono::seconds(4));
-    }
-
-    rc = consensus_service.PromoteThisNode(true, fm_cb->cluster_size);
-    if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) {
-      LOG_ER("Unable to set active controller in consensus service");
-      opensaf_quick_reboot("Unable to set active controller "
-          "in consensus service");
-      return NCSCC_RC_FAILURE;
-    } else if (rc == SA_AIS_ERR_EXIST) {
-      // @todo if we don't reboot, we don't seem to recover from this. Can we
-      // improve?
-      LOG_ER(
-          "A controller is already active. We were separated from the "
-          "cluster?");
-      opensaf_quick_reboot("A controller is already active. We were separated "
-                           "from the cluster?");
-      return NCSCC_RC_FAILURE;
-    }
+    // Start supervision timer, make sure we obtain lock within
+    // 2* FMS_TAKEOVER_REQUEST_VALID_TIME, otherwise reboot the node.
+    // This is needed in case we are in a split network situation
+    // the current active will fail-over work running on this node.
+    LOG_NO("Starting consensus service supervision: %u s",
+           consensus_service.TakeoverValidTime());
+    fm_tmr_start(&fm_cb->consensus_service_supervision_tmr,
+                 200 * consensus_service.TakeoverValidTime());
+
+    std::thread(&promote_node, fm_cb).detach();
+    return NCSCC_RC_SUCCESS;
   }
 
   rc = pcs_rda_request(&rda_req);
-- 
2.7.4



_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to