Failover information is not currently included in coldsync. This means if a delayed failover is in progress *before* a standby controller is available, *and* a controller failover occurs, then information about the delayed failover is lost. --- src/amf/amfd/chkop.cc | 4 ++ src/amf/amfd/ckpt.h | 4 +- src/amf/amfd/ckpt_dec.cc | 77 ++++++++++++++++++++++++++++---------- src/amf/amfd/ckpt_edu.cc | 2 + src/amf/amfd/ckpt_enc.cc | 5 ++- src/amf/amfd/node.h | 3 ++ src/amf/amfd/node_state_machine.cc | 2 + src/amf/amfd/util.cc | 1 + 8 files changed, 76 insertions(+), 22 deletions(-)
diff --git a/src/amf/amfd/chkop.cc b/src/amf/amfd/chkop.cc index e9a68f4..56b0142 100644 --- a/src/amf/amfd/chkop.cc +++ b/src/amf/amfd/chkop.cc @@ -1051,6 +1051,10 @@ uint32_t avsv_send_ckpt_data(AVD_CL_CB *cb, uint32_t action, avd_cb->avd_peer_ver); return NCSCC_RC_SUCCESS; } + if (avd_cb->avd_peer_ver >= AVD_MBCSV_SUB_PART_VERSION_10) { + cb->async_updt_cnt.failover_updt++; + } + break; default: return NCSCC_RC_SUCCESS; diff --git a/src/amf/amfd/ckpt.h b/src/amf/amfd/ckpt.h index 875776a..2e15387 100644 --- a/src/amf/amfd/ckpt.h +++ b/src/amf/amfd/ckpt.h @@ -35,9 +35,10 @@ #define AMF_AMFD_CKPT_H_ // current version -#define AVD_MBCSV_SUB_PART_VERSION 9 +#define AVD_MBCSV_SUB_PART_VERSION 10 // supported versions +#define AVD_MBCSV_SUB_PART_VERSION_10 10 #define AVD_MBCSV_SUB_PART_VERSION_9 9 #define AVD_MBCSV_SUB_PART_VERSION_8 8 #define AVD_MBCSV_SUB_PART_VERSION_7 7 @@ -109,6 +110,7 @@ typedef struct avsv_async_updt_cnt { uint32_t compcstype_updt; uint32_t si_trans_updt; uint32_t ng_updt; + uint32_t failover_updt; } AVSV_ASYNC_UPDT_CNT; /* diff --git a/src/amf/amfd/ckpt_dec.cc b/src/amf/amfd/ckpt_dec.cc index a46f6d3..6288b4f 100644 --- a/src/amf/amfd/ckpt_dec.cc +++ b/src/amf/amfd/ckpt_dec.cc @@ -178,6 +178,31 @@ const AVSV_DECODE_COLD_SYNC_RSP_DATA_FUNC_PTR dec_cs_data_func_list[] = { dec_cs_comp_config, dec_cs_comp_cs_type_config, dec_cs_siass, dec_cs_si_trans, dec_cs_async_updt_cnt}; +void set_node_failover_state(AVD_CL_CB *cb, const SaClmNodeIdT node_id, + const uint32_t state) { + TRACE_ENTER(); + + if (state == NodeState::NodeStates::kUndefined) { + // not in failover list + return; + } + + auto failed_node = cb->failover_list.find(node_id); + if (failed_node != cb->failover_list.end()) { + failed_node->second->SetState(state); + } else { + LOG_NO("Node '%u' not found in failover_list. Create new entry", + node_id); + auto new_node = std::make_shared<NodeStateMachine>(cb, node_id); + // node must be added to failover_list before SetState() is called. + // If the state is 'end', then it will be deleted by SetState(). + // Otherwise, we will leave a node in 'End' state mistakenly in + // failover_list. + cb->failover_list[node_id] = new_node; + new_node->SetState(state); + } +} + void decode_cb(NCS_UBAID *ub, AVD_CL_CB *cb, const uint16_t peer_version) { osaf_decode_uint32(ub, reinterpret_cast<uint32_t *>(&cb->init_state)); osaf_decode_satimet(ub, &cb->cluster_init_time); @@ -254,6 +279,9 @@ void decode_node_config(NCS_UBAID *ub, AVD_AVND *avnd, osaf_decode_uint32(ub, &avnd->rcv_msg_id); osaf_decode_uint32(ub, &avnd->snd_msg_id); osaf_extended_name_free(&node_name); + if (peer_version >= AVD_MBCSV_SUB_PART_VERSION_10) { + osaf_decode_uint32(ub, &avnd->failover_state); + } TRACE_LEAVE(); } @@ -585,7 +613,7 @@ void decode_siass(NCS_UBAID *ub, AVSV_SU_SI_REL_CKPT_MSG *su_si_ckpt, su_si_ckpt->csi_add_rem = static_cast<SaBoolT>(csi_add_rem); osaf_decode_sanamet(ub, &su_si_ckpt->comp_name); osaf_decode_sanamet(ub, &su_si_ckpt->csi_name); - }; + } } /****************************************************************************\ @@ -2199,6 +2227,7 @@ static uint32_t dec_cs_node_config(AVD_CL_CB *cb, NCS_MBCSV_CB_DEC *dec, for (count = 0; count < num_of_obj; count++) { decode_node_config(&dec->i_uba, &avnd, dec->i_peer_version); status = avd_ckpt_node(cb, &avnd, dec->i_action); + set_node_failover_state(cb, avnd.node_info.nodeId, avnd.failover_state); osafassert(status == NCSCC_RC_SUCCESS); } @@ -2552,14 +2581,23 @@ static uint32_t dec_cs_async_updt_cnt(AVD_CL_CB *cb, NCS_MBCSV_CB_DEC *dec, /* * Decode and send async update counts for all the data structures. */ - if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_7) { + if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_10) { TRACE( - "Peer AMFD version is >= AVD_MBCSV_SUB_PART_VERSION_7," + "Peer AMFD version is >= AVD_MBCSV_SUB_PART_VERSION_10," "peer ver:%d", avd_cb->avd_peer_ver); status = m_NCS_EDU_VER_EXEC(&cb->edu_hdl, avsv_edp_ckpt_msg_async_updt_cnt, &dec->i_uba, EDP_OP_TYPE_DEC, &updt_cnt, &ederror, dec->i_peer_version); + } else if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_7) { + TRACE( + "Peer AMFD version is >= AVD_MBCSV_SUB_PART_VERSION_7," + "peer ver:%d", + avd_cb->avd_peer_ver); + status = m_NCS_EDU_SEL_VER_EXEC( + &cb->edu_hdl, avsv_edp_ckpt_msg_async_updt_cnt, &dec->i_uba, + EDP_OP_TYPE_DEC, &updt_cnt, &ederror, dec->i_peer_version, 14, 1, 2, 3, + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14); } else { TRACE( "Peer AMFD version is <AVD_MBCSV_SUB_PART_VERSION_7," @@ -2607,15 +2645,21 @@ uint32_t avd_dec_warm_sync_rsp(AVD_CL_CB *cb, NCS_MBCSV_CB_DEC *dec) { * Decode latest async update counts. (In the same manner we received * in the last message of the cold sync response. */ - if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_7) + if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_10) { status = m_NCS_EDU_VER_EXEC(&cb->edu_hdl, avsv_edp_ckpt_msg_async_updt_cnt, &dec->i_uba, EDP_OP_TYPE_DEC, &updt_cnt, &ederror, dec->i_peer_version); - else + } else if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_7) { + status = m_NCS_EDU_SEL_VER_EXEC( + &cb->edu_hdl, avsv_edp_ckpt_msg_async_updt_cnt, &dec->i_uba, + EDP_OP_TYPE_DEC, &updt_cnt, &ederror, dec->i_peer_version, 14, 1, 2, 3, + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14); + } else { status = m_NCS_EDU_SEL_VER_EXEC( &cb->edu_hdl, avsv_edp_ckpt_msg_async_updt_cnt, &dec->i_uba, EDP_OP_TYPE_DEC, &updt_cnt, &ederror, dec->i_peer_version, 13, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); + } if (status != NCSCC_RC_SUCCESS) LOG_ER("%s: decode failed, ederror=%u", __FUNCTION__, ederror); @@ -2677,6 +2721,9 @@ uint32_t avd_dec_warm_sync_rsp(AVD_CL_CB *cb, NCS_MBCSV_CB_DEC *dec) { if (updt_cnt->ng_updt != cb->async_updt_cnt.ng_updt) LOG_ER("ng_updt counters mismatch: Active: %u Standby: %u", updt_cnt->ng_updt, cb->async_updt_cnt.ng_updt); + if (updt_cnt->failover_updt != cb->async_updt_cnt.failover_updt) + LOG_ER("failover_updt counters mismatch: Active: %u Standby: %u", + updt_cnt->failover_updt, cb->async_updt_cnt.failover_updt); LOG_ER("Out of sync detected in warm sync response, exiting"); osafassert(0); @@ -2982,21 +3029,11 @@ static uint32_t dec_node_failover_state(AVD_CL_CB *cb, NCS_MBCSV_CB_DEC *dec) { osaf_decode_uint32(&dec->i_uba, reinterpret_cast<uint32_t *>(&state)); - auto failed_node = cb->failover_list.find(node->node_info.nodeId); - if (failed_node != cb->failover_list.end()) { - failed_node->second->SetState(state); - } else { - LOG_NO("Node '%s' not found in failover_list. Create new entry", - node->node_name.c_str()); - auto new_node = std::make_shared<NodeStateMachine>(cb, - node->node_info.nodeId); - // node must be added to failover_list before SetState() is called. - // If the state is 'end', then it will be deleted by SetState(). - // Otherwise, we will leave a node in 'End' state mistakenly in - // failover_list. - cb->failover_list[node->node_info.nodeId] = new_node; - new_node->SetState(state); + set_node_failover_state(cb, node->node_info.nodeId, state); + + if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_10) { + cb->async_updt_cnt.failover_updt++; } return NCSCC_RC_SUCCESS; -} \ No newline at end of file +} diff --git a/src/amf/amfd/ckpt_edu.cc b/src/amf/amfd/ckpt_edu.cc index 5564c66..570d8bd 100644 --- a/src/amf/amfd/ckpt_edu.cc +++ b/src/amf/amfd/ckpt_edu.cc @@ -119,6 +119,8 @@ uint32_t avsv_edp_ckpt_msg_async_updt_cnt(EDU_HDL *hdl, EDU_TKN *edu_tkn, (long)&((AVSV_ASYNC_UPDT_CNT *)0)->si_trans_updt, 0, nullptr}, {EDU_EXEC, ncs_edp_uns32, 0, 0, 0, (long)&((AVSV_ASYNC_UPDT_CNT *)0)->ng_updt, 0, nullptr}, + {EDU_EXEC, ncs_edp_uns32, 0, 0, 0, + (long)&((AVSV_ASYNC_UPDT_CNT *)0)->failover_updt, 0, nullptr}, {EDU_END, 0, 0, 0, 0, 0, 0, nullptr}, }; diff --git a/src/amf/amfd/ckpt_enc.cc b/src/amf/amfd/ckpt_enc.cc index 0e675ae..2ca98f8 100644 --- a/src/amf/amfd/ckpt_enc.cc +++ b/src/amf/amfd/ckpt_enc.cc @@ -265,6 +265,9 @@ void encode_node_config(NCS_UBAID *ub, const AVD_AVND *avnd, osaf_encode_uint32(ub, AVSV_AVND_CARD_SYS_CON); osaf_encode_uint32(ub, avnd->rcv_msg_id); osaf_encode_uint32(ub, avnd->snd_msg_id); + if (peer_version >= AVD_MBCSV_SUB_PART_VERSION_10) { + osaf_encode_uint32(ub, avnd->failover_state); + } } /****************************************************************************\ @@ -616,7 +619,7 @@ void encode_siass(NCS_UBAID *ub, const AVD_SU_SI_REL *susi, osaf_encode_bool(ub, static_cast<bool>(susi->csi_add_rem)); osaf_encode_sanamet_o2(ub, susi->comp_name.c_str()); osaf_encode_sanamet_o2(ub, susi->csi_name.c_str()); - }; + } } /****************************************************************************\ diff --git a/src/amf/amfd/node.h b/src/amf/amfd/node.h index dbe48dc..097f54b 100644 --- a/src/amf/amfd/node.h +++ b/src/amf/amfd/node.h @@ -115,6 +115,9 @@ class AVD_AVND { * Checkpointing - Sent independent update */ + // used for cold sync only + uint32_t failover_state{NodeState::NodeStates::kUndefined}; + std::vector<AVD_SU *> list_of_ncs_su; /* the list of NCS service units on * this node. */ diff --git a/src/amf/amfd/node_state_machine.cc b/src/amf/amfd/node_state_machine.cc index 4653f79..efe2085 100644 --- a/src/amf/amfd/node_state_machine.cc +++ b/src/amf/amfd/node_state_machine.cc @@ -43,6 +43,8 @@ void NodeStateMachine::SetState(std::shared_ptr<NodeState> state) { AVD_AVND *node = avd_node_find_nodeid(node_id_); osafassert(node != nullptr); m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(cb_, node, AVSV_CKPT_NODE_FAILOVER_STATE); + // used for cold sync + node->failover_state = state_->GetInt(); if (state->GetInt() == NodeState::kEnd) { cb_->failover_list.erase(node_id_); diff --git a/src/amf/amfd/util.cc b/src/amf/amfd/util.cc index 0dc3e99..9daa10a 100644 --- a/src/amf/amfd/util.cc +++ b/src/amf/amfd/util.cc @@ -1359,6 +1359,7 @@ int amfd_file_dump(const char *filename) { fprintf(f, " compcstype_updt:%d\n", avd_cb->async_updt_cnt.compcstype_updt); fprintf(f, " si_trans_updt:%d\n", avd_cb->async_updt_cnt.si_trans_updt); fprintf(f, " ng_updt:%d\n", avd_cb->async_updt_cnt.ng_updt); + fprintf(f, " failover_updt:%d\n", avd_cb->async_updt_cnt.failover_updt); fprintf(f, "nodes:\n"); for (const auto &value : *node_id_db) { -- 2.7.4 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel