Hi Gary

 

I have 2 comments bellow.

 

Regards

Canh

 

-----Original Message-----
From: Gary Lee <gary....@dektech.com.au> 
Sent: Wednesday, July 3, 2019 1:27 PM
To: hans.nordeb...@ericsson.com; minh.c...@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net
Subject: [devel] [PATCH 2/3] fmd: add active promotion supervision timer
[#3029]

 

Add supervision timer so controller will reboot if it cannot obtain

consensus lock within the allocation period

(2* FMS_TAKEOVER_REQUEST_VALID_TIME).

 

The peer controller can then safely perform a node failover

after this period of time.

---

src/fm/fmd/fm_cb.h    |  2 ++

src/fm/fmd/fm_main.cc | 14 ++++++++-

src/fm/fmd/fm_rda.cc  | 78
+++++++++++++++++++++++++++++++++++----------------

3 files changed, 69 insertions(+), 25 deletions(-)

 

diff --git a/src/fm/fmd/fm_cb.h b/src/fm/fmd/fm_cb.h

index 6eb0d54..b5ea5ae 100644

--- a/src/fm/fmd/fm_cb.h

+++ b/src/fm/fmd/fm_cb.h

@@ -39,6 +39,7 @@ typedef enum {

   FM_TMR_TYPE_MIN,

   FM_TMR_PROMOTE_ACTIVE,

   FM_TMR_ACTIVATION_SUPERVISION,

+  FM_TMR_CONSENSUS_SERVICE_SUPERVISION,

   FM_TMR_TYPE_MAX

} FM_TMR_TYPE;

@@ -83,6 +84,7 @@ struct FM_CB {

   /* Timers */

   FM_TMR promote_active_tmr{};

   FM_TMR activation_supervision_tmr{};

+  FM_TMR consensus_service_supervision_tmr{};

   /* Time in terms of one hundredth of seconds (500 for 5 secs.) */

   uint32_t active_promote_tmr_val{};

diff --git a/src/fm/fmd/fm_main.cc b/src/fm/fmd/fm_main.cc

index 2eb3c16..4a843cc 100644

--- a/src/fm/fmd/fm_main.cc

+++ b/src/fm/fmd/fm_main.cc

@@ -59,7 +59,8 @@ static uint32_t fm_get_args(FM_CB *);

static uint32_t fms_fms_exchange_node_info(FM_CB *);

static uint32_t fms_fms_inform_terminating(FM_CB *fm_cb);

static uint32_t fm_nid_notify(uint32_t);

-static uint32_t fm_tmr_start(FM_TMR *, SaTimeT);

+uint32_t fm_tmr_start(FM_TMR *, SaTimeT);

+void fm_tmr_stop(FM_TMR *tmr);

static SaAisErrorT get_peer_clm_node_name(NODE_ID);

static SaAisErrorT fm_clm_init();

static void fm_mbx_msg_handler(FM_CB *, FM_EVT *);

@@ -449,6 +450,8 @@ static uint32_t fm_get_args(FM_CB *fm_cb) {

   /* Set timer variables */

   fm_cb->promote_active_tmr.type = FM_TMR_PROMOTE_ACTIVE;

   fm_cb->activation_supervision_tmr.type = FM_TMR_ACTIVATION_SUPERVISION;

+  fm_cb->consensus_service_supervision_tmr.type =

+    FM_TMR_CONSENSUS_SERVICE_SUPERVISION;

   char *node_isolation_timeout = getenv("FMS_NODE_ISOLATION_TIMEOUT");

   if (node_isolation_timeout != NULL) {

@@ -704,6 +707,11 @@ static void fm_mbx_msg_handler(FM_CB *fm_cb, FM_EVT
*fm_mbx_evt) {

                        "Activation timer supervision "

                        "expired: no ACTIVE assignment received "

                        "within the time limit");

+      } else if (fm_mbx_evt->info.fm_tmr->type ==

+                 FM_TMR_CONSENSUS_SERVICE_SUPERVISION) {

+        opensaf_quick_reboot("Consensus service supervision "

+                             "expired: controller was not promoted "

+                             "within the time limit");

       }

       break;

@@ -728,6 +736,10 @@ static void fm_evt_proc_rda_callback(FM_CB *cb, FM_EVT
*evt) {

   uint32_t rc = NCSCC_RC_SUCCESS;

   TRACE_ENTER2("%d", (int)evt->info.rda_info.role);

+  if (evt->info.rda_info.role == PCS_RDA_ACTIVE) {

+    LOG_NO("Controller promoted. Stop supervision timer");

+    fm_tmr_stop(&fm_cb->consensus_service_supervision_tmr);

+  }

   if (evt->info.rda_info.role != PCS_RDA_ACTIVE &&

       cb->activation_supervision_tmr.status == FM_TMR_RUNNING) {

     fm_tmr_stop(&cb->activation_supervision_tmr);

diff --git a/src/fm/fmd/fm_rda.cc b/src/fm/fmd/fm_rda.cc

index d3063ba..0544152 100644

--- a/src/fm/fmd/fm_rda.cc

+++ b/src/fm/fmd/fm_rda.cc

@@ -23,6 +23,8 @@

#include "osaf/consensus/consensus.h"

#include "rde/agent/rda_papi.h"

+extern uint32_t fm_tmr_start(FM_TMR *tmr, SaTimeT period);

+extern void fm_tmr_stop(FM_TMR *tmr);

extern void rda_cb(uint32_t cb_hdl, PCS_RDA_CB_INFO *cb_info,

                    PCSRDA_RETURN_CODE error_code);

/***************************************************************************
*

@@ -64,6 +66,47 @@ done:

   return rc;

}

+void promote_node(FM_CB *fm_cb) {

+  TRACE_ENTER();

+

+  Consensus consensus_service;

+  if (consensus_service.PrioritisePartitionSize() == true) {

+    // Allow topology events to be processed first. The MDS thread may

+    // be processing MDS down events and updating cluster_size
concurrently.

+    // We need cluster_size to be as accurate as possible, without waiting

+    // too long for node down events.

+    std::this_thread::sleep_for(std::chrono::seconds(2));

[Canh] Could we replace hard code wait here by do the retries of promote ? 

I am thinking the time of promotion node will reduced in case topology event
is handled very fast.

I also see that you change the time wait from 4s -> 2s. As previously I saw
that the time may need to wait more than 3s sometimes.

So the promotion may be failed.

 

+  }

+

+  uint32_t rc;

+  rc = consensus_service.PromoteThisNode(true, fm_cb->cluster_size);

+  if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) {

+    LOG_ER("Unable to set active controller in consensus service");

+    opensaf_quick_reboot("Unable to set active controller "

+      "in consensus service");

+  } else if (rc == SA_AIS_ERR_EXIST) {

+    // @todo if we don't reboot, we don't seem to recover from this. Can we

+    // improve?

+    LOG_ER(

+        "A controller is already active. We were separated from the "

+        "cluster?");

+    opensaf_quick_reboot("A controller is already active. We were separated
"

+                         "from the cluster?");

+  }

+

+  PCS_RDA_REQ rda_req;

+

+  /* set the RDA role to active */

+  memset(&rda_req, 0, sizeof(PCS_RDA_REQ));

+  rda_req.req_type = PCS_RDA_SET_ROLE;

+  rda_req.info.io_role = PCS_RDA_ACTIVE;

+

+  rc = pcs_rda_request(&rda_req);

+  if (rc != PCSRDA_RC_SUCCESS) {

+    LOG_ER("pcs_rda_request() failed)");

+  }

+}

+

/***************************************************************************
*

  * Name          : fm_rda_set_role

  *

@@ -88,30 +131,17 @@ uint32_t fm_rda_set_role(FM_CB *fm_cb, PCS_RDA_ROLE
role) {

   Consensus consensus_service;

   if (consensus_service.IsEnabled() == true) {

-    if (consensus_service.PrioritisePartitionSize() == true) {

-      // Allow topology events to be processed first. The MDS thread may

-      // be processing MDS down events and updating cluster_size
concurrently.

-      // We need cluster_size to be as accurate as possible, without
waiting

-      // too long for node down events.

-      std::this_thread::sleep_for(std::chrono::seconds(4));

-    }

-

-    rc = consensus_service.PromoteThisNode(true, fm_cb->cluster_size);

-    if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) {

-      LOG_ER("Unable to set active controller in consensus service");

-      opensaf_quick_reboot("Unable to set active controller "

-          "in consensus service");

-      return NCSCC_RC_FAILURE;

-    } else if (rc == SA_AIS_ERR_EXIST) {

-      // @todo if we don't reboot, we don't seem to recover from this. Can
we

-      // improve?

-      LOG_ER(

-          "A controller is already active. We were separated from the "

-          "cluster?");

-      opensaf_quick_reboot("A controller is already active. We were
separated "

-                           "from the cluster?");

-      return NCSCC_RC_FAILURE;

-    }

+    // Start supervision timer, make sure we obtain lock within

+    // 2* FMS_TAKEOVER_REQUEST_VALID_TIME, otherwise reboot the node.

+    // This is needed in case we are in a split network situation

+    // the current active will fail-over work running on this node.

+    LOG_NO("Starting consensus service supervision: %u s",

+           consensus_service.TakeoverValidTime());

+    fm_tmr_start(&fm_cb->consensus_service_supervision_tmr,

+                 200 * consensus_service.TakeoverValidTime());

+

+    std::thread(&promote_node, fm_cb).detach();

+    return NCSCC_RC_SUCCESS;

   }

[Canh] the setting value of variable "rda_req" should move to here ?

  memset(&rda_req, 0, sizeof(PCS_RDA_REQ));

  rda_req.req_type = PCS_RDA_SET_ROLE;

  rda_req.info.io_role = role;

In case using consensus this rda request is not sent here

 

   rc = pcs_rda_request(&rda_req);

-- 

2.7.4

 

 

 

_______________________________________________

Opensaf-devel mailing list

 <mailto:Opensaf-devel@lists.sourceforge.net>
Opensaf-devel@lists.sourceforge.net

 <https://lists.sourceforge.net/lists/listinfo/opensaf-devel>
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to