Failover information is not currently included in coldsync. This means
if a delayed failover is in progress *before* a standby controller is
available, *and* a controller failover occurs, then information about
the delayed failover is lost.
---
src/amf/amfd/chkop.cc | 4 ++
src/amf/amfd/ckpt.h | 4 +-
src/amf/amfd/ckpt_dec.cc | 77 ++++++++++++++++++++++++++++----------
src/amf/amfd/ckpt_edu.cc | 2 +
src/amf/amfd/ckpt_enc.cc | 5 ++-
src/amf/amfd/node.h | 3 ++
src/amf/amfd/node_state_machine.cc | 2 +
src/amf/amfd/util.cc | 1 +
8 files changed, 76 insertions(+), 22 deletions(-)
diff --git a/src/amf/amfd/chkop.cc b/src/amf/amfd/chkop.cc
index e9a68f4..56b0142 100644
--- a/src/amf/amfd/chkop.cc
+++ b/src/amf/amfd/chkop.cc
@@ -1051,6 +1051,10 @@ uint32_t avsv_send_ckpt_data(AVD_CL_CB *cb, uint32_t
action,
avd_cb->avd_peer_ver);
return NCSCC_RC_SUCCESS;
}
+ if (avd_cb->avd_peer_ver >= AVD_MBCSV_SUB_PART_VERSION_10) {
+ cb->async_updt_cnt.failover_updt++;
+ }
+
break;
default:
return NCSCC_RC_SUCCESS;
diff --git a/src/amf/amfd/ckpt.h b/src/amf/amfd/ckpt.h
index 875776a..2e15387 100644
--- a/src/amf/amfd/ckpt.h
+++ b/src/amf/amfd/ckpt.h
@@ -35,9 +35,10 @@
#define AMF_AMFD_CKPT_H_
// current version
-#define AVD_MBCSV_SUB_PART_VERSION 9
+#define AVD_MBCSV_SUB_PART_VERSION 10
// supported versions
+#define AVD_MBCSV_SUB_PART_VERSION_10 10
#define AVD_MBCSV_SUB_PART_VERSION_9 9
#define AVD_MBCSV_SUB_PART_VERSION_8 8
#define AVD_MBCSV_SUB_PART_VERSION_7 7
@@ -109,6 +110,7 @@ typedef struct avsv_async_updt_cnt {
uint32_t compcstype_updt;
uint32_t si_trans_updt;
uint32_t ng_updt;
+ uint32_t failover_updt;
} AVSV_ASYNC_UPDT_CNT;
/*
diff --git a/src/amf/amfd/ckpt_dec.cc b/src/amf/amfd/ckpt_dec.cc
index a46f6d3..6288b4f 100644
--- a/src/amf/amfd/ckpt_dec.cc
+++ b/src/amf/amfd/ckpt_dec.cc
@@ -178,6 +178,31 @@ const AVSV_DECODE_COLD_SYNC_RSP_DATA_FUNC_PTR
dec_cs_data_func_list[] = {
dec_cs_comp_config, dec_cs_comp_cs_type_config, dec_cs_siass,
dec_cs_si_trans, dec_cs_async_updt_cnt};
+void set_node_failover_state(AVD_CL_CB *cb, const SaClmNodeIdT node_id,
+ const uint32_t state) {
+ TRACE_ENTER();
+
+ if (state == NodeState::NodeStates::kUndefined) {
+ // not in failover list
+ return;
+ }
+
+ auto failed_node = cb->failover_list.find(node_id);
+ if (failed_node != cb->failover_list.end()) {
+ failed_node->second->SetState(state);
+ } else {
+ LOG_NO("Node '%u' not found in failover_list. Create new entry",
+ node_id);
+ auto new_node = std::make_shared<NodeStateMachine>(cb, node_id);
+ // node must be added to failover_list before SetState() is called.
+ // If the state is 'end', then it will be deleted by SetState().
+ // Otherwise, we will leave a node in 'End' state mistakenly in
+ // failover_list.
+ cb->failover_list[node_id] = new_node;
+ new_node->SetState(state);
+ }
+}
+
void decode_cb(NCS_UBAID *ub, AVD_CL_CB *cb, const uint16_t peer_version) {
osaf_decode_uint32(ub, reinterpret_cast<uint32_t *>(&cb->init_state));
osaf_decode_satimet(ub, &cb->cluster_init_time);
@@ -254,6 +279,9 @@ void decode_node_config(NCS_UBAID *ub, AVD_AVND *avnd,
osaf_decode_uint32(ub, &avnd->rcv_msg_id);
osaf_decode_uint32(ub, &avnd->snd_msg_id);
osaf_extended_name_free(&node_name);
+ if (peer_version >= AVD_MBCSV_SUB_PART_VERSION_10) {
+ osaf_decode_uint32(ub, &avnd->failover_state);
+ }
TRACE_LEAVE();
}
@@ -585,7 +613,7 @@ void decode_siass(NCS_UBAID *ub, AVSV_SU_SI_REL_CKPT_MSG *su_si_ckpt,
su_si_ckpt->csi_add_rem = static_cast<SaBoolT>(csi_add_rem);
osaf_decode_sanamet(ub, &su_si_ckpt->comp_name);
osaf_decode_sanamet(ub, &su_si_ckpt->csi_name);
- };
+ }
}
/****************************************************************************\
@@ -2199,6 +2227,7 @@ static uint32_t dec_cs_node_config(AVD_CL_CB *cb,
NCS_MBCSV_CB_DEC *dec,
for (count = 0; count < num_of_obj; count++) {
decode_node_config(&dec->i_uba, &avnd, dec->i_peer_version);
status = avd_ckpt_node(cb, &avnd, dec->i_action);
+ set_node_failover_state(cb, avnd.node_info.nodeId, avnd.failover_state);
osafassert(status == NCSCC_RC_SUCCESS);
}
@@ -2552,14 +2581,23 @@ static uint32_t dec_cs_async_updt_cnt(AVD_CL_CB *cb, NCS_MBCSV_CB_DEC *dec,
/*
* Decode and send async update counts for all the data structures.
*/
- if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_7) {
+ if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_10) {
TRACE(
- "Peer AMFD version is >= AVD_MBCSV_SUB_PART_VERSION_7,"
+ "Peer AMFD version is >= AVD_MBCSV_SUB_PART_VERSION_10,"
"peer ver:%d",
avd_cb->avd_peer_ver);
status = m_NCS_EDU_VER_EXEC(&cb->edu_hdl,
avsv_edp_ckpt_msg_async_updt_cnt,
&dec->i_uba, EDP_OP_TYPE_DEC, &updt_cnt,
&ederror, dec->i_peer_version);
+ } else if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_7) {
+ TRACE(
+ "Peer AMFD version is >= AVD_MBCSV_SUB_PART_VERSION_7,"
+ "peer ver:%d",
+ avd_cb->avd_peer_ver);
+ status = m_NCS_EDU_SEL_VER_EXEC(
+ &cb->edu_hdl, avsv_edp_ckpt_msg_async_updt_cnt, &dec->i_uba,
+ EDP_OP_TYPE_DEC, &updt_cnt, &ederror, dec->i_peer_version, 14, 1, 2, 3,
+ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14);
} else {
TRACE(
"Peer AMFD version is <AVD_MBCSV_SUB_PART_VERSION_7,"
@@ -2607,15 +2645,21 @@ uint32_t avd_dec_warm_sync_rsp(AVD_CL_CB *cb,
NCS_MBCSV_CB_DEC *dec) {
* Decode latest async update counts. (In the same manner we received
* in the last message of the cold sync response.
*/
- if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_7)
+ if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_10) {
status = m_NCS_EDU_VER_EXEC(&cb->edu_hdl,
avsv_edp_ckpt_msg_async_updt_cnt,
&dec->i_uba, EDP_OP_TYPE_DEC, &updt_cnt,
&ederror, dec->i_peer_version);
- else
+ } else if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_7) {
+ status = m_NCS_EDU_SEL_VER_EXEC(
+ &cb->edu_hdl, avsv_edp_ckpt_msg_async_updt_cnt, &dec->i_uba,
+ EDP_OP_TYPE_DEC, &updt_cnt, &ederror, dec->i_peer_version, 14, 1, 2, 3,
+ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14);
+ } else {
status = m_NCS_EDU_SEL_VER_EXEC(
&cb->edu_hdl, avsv_edp_ckpt_msg_async_updt_cnt, &dec->i_uba,
EDP_OP_TYPE_DEC, &updt_cnt, &ederror, dec->i_peer_version, 13, 1, 2,
3,
4, 5, 6, 7, 8, 9, 10, 11, 12, 13);
+ }
if (status != NCSCC_RC_SUCCESS)
LOG_ER("%s: decode failed, ederror=%u", __FUNCTION__, ederror);
@@ -2677,6 +2721,9 @@ uint32_t avd_dec_warm_sync_rsp(AVD_CL_CB *cb,
NCS_MBCSV_CB_DEC *dec) {
if (updt_cnt->ng_updt != cb->async_updt_cnt.ng_updt)
LOG_ER("ng_updt counters mismatch: Active: %u Standby: %u",
updt_cnt->ng_updt, cb->async_updt_cnt.ng_updt);
+ if (updt_cnt->failover_updt != cb->async_updt_cnt.failover_updt)
+ LOG_ER("failover_updt counters mismatch: Active: %u Standby: %u",
+ updt_cnt->failover_updt, cb->async_updt_cnt.failover_updt);
LOG_ER("Out of sync detected in warm sync response, exiting");
osafassert(0);
@@ -2982,21 +3029,11 @@ static uint32_t dec_node_failover_state(AVD_CL_CB *cb,
NCS_MBCSV_CB_DEC *dec) {
osaf_decode_uint32(&dec->i_uba,
reinterpret_cast<uint32_t *>(&state));
- auto failed_node = cb->failover_list.find(node->node_info.nodeId);
- if (failed_node != cb->failover_list.end()) {
- failed_node->second->SetState(state);
- } else {
- LOG_NO("Node '%s' not found in failover_list. Create new entry",
- node->node_name.c_str());
- auto new_node = std::make_shared<NodeStateMachine>(cb,
- node->node_info.nodeId);
- // node must be added to failover_list before SetState() is called.
- // If the state is 'end', then it will be deleted by SetState().
- // Otherwise, we will leave a node in 'End' state mistakenly in
- // failover_list.
- cb->failover_list[node->node_info.nodeId] = new_node;
- new_node->SetState(state);
+ set_node_failover_state(cb, node->node_info.nodeId, state);
+
+ if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_10) {
+ cb->async_updt_cnt.failover_updt++;
}
return NCSCC_RC_SUCCESS;
-}
\ No newline at end of file
+}
diff --git a/src/amf/amfd/ckpt_edu.cc b/src/amf/amfd/ckpt_edu.cc
index 5564c66..570d8bd 100644
--- a/src/amf/amfd/ckpt_edu.cc
+++ b/src/amf/amfd/ckpt_edu.cc
@@ -119,6 +119,8 @@ uint32_t avsv_edp_ckpt_msg_async_updt_cnt(EDU_HDL *hdl,
EDU_TKN *edu_tkn,
(long)&((AVSV_ASYNC_UPDT_CNT *)0)->si_trans_updt, 0, nullptr},
{EDU_EXEC, ncs_edp_uns32, 0, 0, 0,
(long)&((AVSV_ASYNC_UPDT_CNT *)0)->ng_updt, 0, nullptr},
+ {EDU_EXEC, ncs_edp_uns32, 0, 0, 0,
+ (long)&((AVSV_ASYNC_UPDT_CNT *)0)->failover_updt, 0, nullptr},
{EDU_END, 0, 0, 0, 0, 0, 0, nullptr},
};
diff --git a/src/amf/amfd/ckpt_enc.cc b/src/amf/amfd/ckpt_enc.cc
index 0e675ae..2ca98f8 100644
--- a/src/amf/amfd/ckpt_enc.cc
+++ b/src/amf/amfd/ckpt_enc.cc
@@ -265,6 +265,9 @@ void encode_node_config(NCS_UBAID *ub, const AVD_AVND *avnd,
osaf_encode_uint32(ub, AVSV_AVND_CARD_SYS_CON);
osaf_encode_uint32(ub, avnd->rcv_msg_id);
osaf_encode_uint32(ub, avnd->snd_msg_id);
+ if (peer_version >= AVD_MBCSV_SUB_PART_VERSION_10) {
+ osaf_encode_uint32(ub, avnd->failover_state);
+ }
}
/****************************************************************************\
@@ -616,7 +619,7 @@ void encode_siass(NCS_UBAID *ub, const AVD_SU_SI_REL *susi,
osaf_encode_bool(ub, static_cast<bool>(susi->csi_add_rem));
osaf_encode_sanamet_o2(ub, susi->comp_name.c_str());
osaf_encode_sanamet_o2(ub, susi->csi_name.c_str());
- };
+ }
}
/****************************************************************************\
diff --git a/src/amf/amfd/node.h b/src/amf/amfd/node.h
index dbe48dc..097f54b 100644
--- a/src/amf/amfd/node.h
+++ b/src/amf/amfd/node.h
@@ -115,6 +115,9 @@ class AVD_AVND {
* Checkpointing - Sent independent update
*/
+ // used for cold sync only
+ uint32_t failover_state{NodeState::NodeStates::kUndefined};
+
std::vector<AVD_SU *> list_of_ncs_su; /* the list of NCS service units on
* this node.
*/
diff --git a/src/amf/amfd/node_state_machine.cc
b/src/amf/amfd/node_state_machine.cc
index 4653f79..efe2085 100644
--- a/src/amf/amfd/node_state_machine.cc
+++ b/src/amf/amfd/node_state_machine.cc
@@ -43,6 +43,8 @@ void NodeStateMachine::SetState(std::shared_ptr<NodeState>
state) {
AVD_AVND *node = avd_node_find_nodeid(node_id_);
osafassert(node != nullptr);
m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(cb_, node, AVSV_CKPT_NODE_FAILOVER_STATE);
+ // used for cold sync
+ node->failover_state = state_->GetInt();
if (state->GetInt() == NodeState::kEnd) {
cb_->failover_list.erase(node_id_);
diff --git a/src/amf/amfd/util.cc b/src/amf/amfd/util.cc
index 0dc3e99..9daa10a 100644
--- a/src/amf/amfd/util.cc
+++ b/src/amf/amfd/util.cc
@@ -1359,6 +1359,7 @@ int amfd_file_dump(const char *filename) {
fprintf(f, " compcstype_updt:%d\n",
avd_cb->async_updt_cnt.compcstype_updt);
fprintf(f, " si_trans_updt:%d\n", avd_cb->async_updt_cnt.si_trans_updt);
fprintf(f, " ng_updt:%d\n", avd_cb->async_updt_cnt.ng_updt);
+ fprintf(f, " failover_updt:%d\n", avd_cb->async_updt_cnt.failover_updt);
fprintf(f, "nodes:\n");
for (const auto &value : *node_id_db) {