Currenty amfd does not retry ClmTrackStart/Stop after swicthover if
the CLM APIs are still unavailable due to high loaded system or
connectivity issue. This patch adds a retry mechanism for CLM APIs
as other existing IMM/NTF APIs
---
src/amf/amfd/clm.cc | 79 +++++++++++++++++++++++++++++++++++++++++------
src/amf/amfd/clm.h | 27 ++++++++++++++--
src/amf/amfd/imm.cc | 28 +++++++++++++++++
src/amf/amfd/imm.h | 3 ++
src/amf/amfd/role.cc | 16 ++++------
src/amf/amfd/sg_2n_fsm.cc | 3 +-
src/amf/amfd/sgproc.cc | 3 +-
7 files changed, 135 insertions(+), 24 deletions(-)
diff --git a/src/amf/amfd/clm.cc b/src/amf/amfd/clm.cc
index d8342ca..2bcea2d 100644
--- a/src/amf/amfd/clm.cc
+++ b/src/amf/amfd/clm.cc
@@ -475,46 +475,63 @@ done:
return error;
}
-SaAisErrorT avd_clm_track_start(void) {
+SaAisErrorT avd_clm_track_start(AVD_CL_CB* cb) {
SaUint8T trackFlags = SA_TRACK_CURRENT | SA_TRACK_CHANGES_ONLY |
SA_TRACK_VALIDATE_STEP | SA_TRACK_START_STEP;
TRACE_ENTER();
- SaAisErrorT error =
- saClmClusterTrack_4(avd_cb->clmHandle, trackFlags, nullptr);
+ SaAisErrorT error = SA_AIS_OK;
+
+ if (cb->is_clm_track_started == true) {
+ // abort all pending and unsuccessful jobs that stop tracking
+ // because at this moment, amfd wants to start cluster tracking
+ Fifo::remove(cb, JOB_TYPE_CLM);
+ }
+
+ error = saClmClusterTrack_4(cb->clmHandle, trackFlags, nullptr);
if (error != SA_AIS_OK) {
if (error == SA_AIS_ERR_TRY_AGAIN || error == SA_AIS_ERR_TIMEOUT ||
error == SA_AIS_ERR_UNAVAILABLE) {
LOG_WA("Failed to start cluster tracking %u", error);
+ error = SA_AIS_ERR_TRY_AGAIN;
} else {
LOG_ER("Failed to start cluster tracking %u", error);
}
} else {
- avd_cb->is_clm_track_started = true;
+ cb->is_clm_track_started = true;
}
TRACE_LEAVE();
return error;
}
-SaAisErrorT avd_clm_track_stop(void) {
+SaAisErrorT avd_clm_track_stop(AVD_CL_CB* cb) {
TRACE_ENTER();
- SaAisErrorT error = saClmClusterTrackStop(avd_cb->clmHandle);
+ SaAisErrorT error = SA_AIS_OK;
+
+ if (cb->is_clm_track_started == false) {
+ // abort all pending and unsuccessful jobs that start tracking
+ // because at this moment, amfd wants to sttop cluster tracking
+ Fifo::remove(cb, JOB_TYPE_CLM);
+ }
+
+ error = saClmClusterTrackStop(cb->clmHandle);
if (error != SA_AIS_OK) {
if (error == SA_AIS_ERR_TRY_AGAIN || error == SA_AIS_ERR_TIMEOUT ||
error == SA_AIS_ERR_UNAVAILABLE) {
LOG_WA("Failed to stop cluster tracking %u", error);
+ error = SA_AIS_ERR_TRY_AGAIN;
} else if (error == SA_AIS_ERR_NOT_EXIST) {
/* track changes was not started or stopped successfully */
LOG_WA("Failed to stop cluster tracking %u", error);
- avd_cb->is_clm_track_started = false;
+ cb->is_clm_track_started = false;
+ error = SA_AIS_OK;
} else {
LOG_ER("Failed to stop cluster tracking %u", error);
}
} else {
TRACE("Sucessfully stops cluster tracking");
- avd_cb->is_clm_track_started = false;
+ cb->is_clm_track_started = false;
}
-
TRACE_LEAVE();
return error;
}
@@ -550,7 +567,7 @@ static void *avd_clm_init_thread(void *arg) {
if (cb->avail_state_avd == SA_AMF_HA_ACTIVE) {
for (;;) {
- error = avd_clm_track_start();
+ error = avd_clm_track_start(cb);
if (error == SA_AIS_ERR_TRY_AGAIN || error == SA_AIS_ERR_TIMEOUT ||
error == SA_AIS_ERR_UNAVAILABLE) {
osaf_nanosleep(&kHundredMilliseconds);
@@ -584,3 +601,45 @@ SaAisErrorT avd_start_clm_init_bg(void) {
pthread_attr_destroy(&attr);
return SA_AIS_OK;
}
+
+AvdJobDequeueResultT ClmTrackStart::exec(const AVD_CL_CB* cb) {
+ AvdJobDequeueResultT res;
+ TRACE_ENTER();
+
+ SaAisErrorT rc = avd_clm_track_start(const_cast<AVD_CL_CB*>(cb));
+ if (rc == SA_AIS_OK) {
+ delete Fifo::dequeue();
+ res = JOB_EXECUTED;
+ } else if (rc == SA_AIS_ERR_TRY_AGAIN) {
+ TRACE("TRY-AGAIN");
+ res = JOB_ETRYAGAIN;
+ } else {
+ delete Fifo::dequeue();
+ LOG_ER("%s: ClmTrackStart FAILED %u", __FUNCTION__, rc);
+ res = JOB_ERR;
+ }
+
+ TRACE_LEAVE();
+ return res;
+}
+
+AvdJobDequeueResultT ClmTrackStop::exec(const AVD_CL_CB* cb) {
+ AvdJobDequeueResultT res;
+ TRACE_ENTER();
+
+ SaAisErrorT rc = avd_clm_track_stop(const_cast<AVD_CL_CB*>(cb));
+ if (rc == SA_AIS_OK) {
+ delete Fifo::dequeue();
+ res = JOB_EXECUTED;
+ } else if (rc == SA_AIS_ERR_TRY_AGAIN) {
+ TRACE("TRY-AGAIN");
+ res = JOB_ETRYAGAIN;
+ } else {
+ delete Fifo::dequeue();
+ LOG_ER("%s: ClmTrackStop FAILED %u", __FUNCTION__, rc);
+ res = JOB_ERR;
+ }
+
+ TRACE_LEAVE();
+ return res;
+}
diff --git a/src/amf/amfd/clm.h b/src/amf/amfd/clm.h
index 6adf796..2bbe320 100644
--- a/src/amf/amfd/clm.h
+++ b/src/amf/amfd/clm.h
@@ -1,6 +1,7 @@
/* -*- OpenSAF -*-
*
* (C) Copyright 2010 The OpenSAF Foundation
+ * Copyright Ericsson AB 2017 - All Rights Reserved.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -16,6 +17,7 @@
*/
#include <saClm.h>
+#include "amf/amfd/imm.h"
#ifndef AMF_AMFD_CLM_H_
#define AMF_AMFD_CLM_H_
@@ -23,9 +25,30 @@
struct cl_cb_tag;
extern SaAisErrorT avd_clm_init(struct cl_cb_tag *);
-extern SaAisErrorT avd_clm_track_start(void);
-extern SaAisErrorT avd_clm_track_stop(void);
+extern SaAisErrorT avd_clm_track_start(struct cl_cb_tag *);
+extern SaAisErrorT avd_clm_track_stop(struct cl_cb_tag *);
extern void clm_node_terminate(AVD_AVND *node);
extern SaAisErrorT avd_start_clm_init_bg(void);
+class ClmJob : public Job {
+public:
+ ClmJob() {}
+ AvdJobTypeT getJobType() { return JOB_TYPE_CLM; }
+ bool isRunnable(const AVD_CL_CB *cb) { return true;}
+};
+
+class ClmTrackStart : public ClmJob {
+ public:
+ ClmTrackStart() : ClmJob(){};
+ AvdJobDequeueResultT exec(const struct cl_cb_tag *cb);
+ ~ClmTrackStart() {}
+};
+
+class ClmTrackStop : public ClmJob {
+ public:
+ ClmTrackStop() : ClmJob(){};
+ AvdJobDequeueResultT exec(const struct cl_cb_tag *cb);
+ ~ClmTrackStop() {}
+};
+
#endif // AMF_AMFD_CLM_H_
diff --git a/src/amf/amfd/imm.cc b/src/amf/amfd/imm.cc
index baf646c..bf7e3d3 100644
--- a/src/amf/amfd/imm.cc
+++ b/src/amf/amfd/imm.cc
@@ -461,6 +461,34 @@ AvdJobDequeueResultT Fifo::executeAll(const AVD_CL_CB *cb,
AvdJobTypeT job_type)
return ret;
}
+void Fifo::remove(const AVD_CL_CB *cb, AvdJobTypeT job_type) {
+
+ Job *ajob, *firstjob;
+
+ TRACE_ENTER();
+ firstjob = nullptr;
+
+ while ((ajob = peek()) != nullptr) {
+ if (ajob->getJobType() == job_type) {
+ delete Fifo::dequeue();
+ } else {
+ // push back
+ ajob = Fifo::dequeue();
+ Fifo::queue(ajob);
+
+ // check if we have gone through all jobs of queue
+ if (firstjob == nullptr) {
+ firstjob = ajob;
+ } else {
+ if (firstjob == ajob)
+ break;
+ }
+ }
+ }
+
+ TRACE_LEAVE();
+}
+
AvdJobDequeueResultT Fifo::executeAdminResp(const AVD_CL_CB *cb) {
Job *ajob;
AvdJobDequeueResultT ret = JOB_EXECUTED;
diff --git a/src/amf/amfd/imm.h b/src/amf/amfd/imm.h
index f0152ac..83c5686 100644
--- a/src/amf/amfd/imm.h
+++ b/src/amf/amfd/imm.h
@@ -56,6 +56,7 @@ typedef enum {
typedef enum {
JOB_TYPE_IMM = 1, /* A IMM job */
JOB_TYPE_NTF = 2, /* A NTF job */
+ JOB_TYPE_CLM = 3, /* A CLM job */
JOB_TYPE_ANY
} AvdJobTypeT;
@@ -166,6 +167,8 @@ class Fifo {
static AvdJobDequeueResultT execute(const AVD_CL_CB *cb);
static AvdJobDequeueResultT executeAll(const AVD_CL_CB *cb,
AvdJobTypeT job_type = JOB_TYPE_ANY);
+ static void remove(const AVD_CL_CB *cb,
+ AvdJobTypeT job_type = JOB_TYPE_ANY);
static AvdJobDequeueResultT executeAdminResp(const AVD_CL_CB *cb);
static void empty();
diff --git a/src/amf/amfd/role.cc b/src/amf/amfd/role.cc
index ec13c3b..865d89d 100644
--- a/src/amf/amfd/role.cc
+++ b/src/amf/amfd/role.cc
@@ -1066,7 +1066,8 @@ uint32_t amfd_switch_actv_qsd(AVD_CL_CB *cb) {
/* Mark AVD as Quiesced. */
cb->avail_state_avd = SA_AMF_HA_QUIESCED;
- avd_clm_track_stop();
+ if (avd_clm_track_stop(cb) == SA_AIS_ERR_TRY_AGAIN)
+ Fifo::queue(new ClmTrackStop());
/* Go ahead and set mds role as already the NCS SU has been switched */
if (NCSCC_RC_SUCCESS !=
@@ -1105,7 +1106,6 @@ uint32_t amfd_switch_actv_qsd(AVD_CL_CB *cb) {
uint32_t amfd_switch_qsd_stdby(AVD_CL_CB *cb) {
uint32_t status = NCSCC_RC_SUCCESS;
- SaAisErrorT ais_rc;
TRACE_ENTER();
LOG_NO("Switching Quiesced --> StandBy");
@@ -1138,12 +1138,8 @@ uint32_t amfd_switch_qsd_stdby(AVD_CL_CB *cb) {
avd_pg_node_csi_del_all(cb, avnd);
}
- if (cb->is_clm_track_started == true) {
- ais_rc = avd_clm_track_stop();
- if (ais_rc != SA_AIS_OK && ais_rc != SA_AIS_ERR_NOT_EXIST) {
- LOG_ER("Failed to stop cluster tracking after switch over");
- }
- }
+ if (avd_clm_track_stop(cb) == SA_AIS_ERR_TRY_AGAIN)
+ Fifo::queue(new ClmTrackStop());
LOG_NO("Controller switch over done");
saflog(LOG_NOTICE, amfSvcUsrName, "Controller switch over done at %x",
@@ -1274,9 +1270,9 @@ uint32_t amfd_switch_stdby_actv(AVD_CL_CB *cb) {
if (NCSCC_RC_SUCCESS != avd_rde_set_role(SA_AMF_HA_ACTIVE)) {
LOG_ER("rde role change failed from stdy -> Active");
}
-
- if (avd_clm_track_start() != SA_AIS_OK) {
+ if (avd_clm_track_start(cb) == SA_AIS_ERR_TRY_AGAIN) {
LOG_ER("Switch Standby --> Active, clm track start failed");
+ Fifo::queue(new ClmTrackStart());
avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, SA_AMF_HA_ACTIVE);
return NCSCC_RC_FAILURE;
}
diff --git a/src/amf/amfd/sg_2n_fsm.cc b/src/amf/amfd/sg_2n_fsm.cc
index 3a7609e..c7d5844 100644
--- a/src/amf/amfd/sg_2n_fsm.cc
+++ b/src/amf/amfd/sg_2n_fsm.cc
@@ -1767,7 +1767,8 @@ uint32_t SG_2N::susi_success_sg_realign(AVD_SU *su,
AVD_SU_SI_REL *susi,
if ((state == SA_AMF_HA_ACTIVE) &&
(cb->node_id_avd == su->su_on_node->node_info.nodeId)) {
/* This is as a result of failover, start CLM tracking*/
- (void)avd_clm_track_start();
+ if (avd_clm_track_start(cb) == SA_AIS_ERR_TRY_AGAIN)
+ Fifo::queue(new ClmTrackStart());
}
// Set active_services_exist at error conditions e.g. controller
fail-over
diff --git a/src/amf/amfd/sgproc.cc b/src/amf/amfd/sgproc.cc
index d305b22..610c205 100644
--- a/src/amf/amfd/sgproc.cc
+++ b/src/amf/amfd/sgproc.cc
@@ -2201,7 +2201,8 @@ void avd_node_down_mw_susi_failover(AVD_CL_CB *cb,
AVD_AVND *avnd) {
in avd_sg_2n_susi_sucss_sg_reln, so start here.*/
if ((i_su->sg_of_su->sg_redundancy_model == SA_AMF_2N_REDUNDANCY_MODEL) &&
(i_su->sg_of_su->sg_fsm_state == AVD_SG_FSM_STABLE))
- (void)avd_clm_track_start();
+ if (avd_clm_track_start(cb) == SA_AIS_ERR_TRY_AGAIN)
+ Fifo::queue(new ClmTrackStart());
/* Free all the SU SI assignments*/
i_su->delete_all_susis();
--
2.7.4
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel