Hi,

ack (code review only)

Thanks

Minh

On 19/7/19 4:47 pm, Gary Lee wrote:
Failover information is not currently included in coldsync. This means
if a delayed failover is in progress *before* a standby controller is
available, *and* a controller failover occurs, then information about
the delayed failover is lost.
---
  src/amf/amfd/chkop.cc              |  4 ++
  src/amf/amfd/ckpt.h                |  4 +-
  src/amf/amfd/ckpt_dec.cc           | 77 ++++++++++++++++++++++++++++----------
  src/amf/amfd/ckpt_edu.cc           |  2 +
  src/amf/amfd/ckpt_enc.cc           |  5 ++-
  src/amf/amfd/node.h                |  3 ++
  src/amf/amfd/node_state_machine.cc |  2 +
  src/amf/amfd/util.cc               |  1 +
  8 files changed, 76 insertions(+), 22 deletions(-)

diff --git a/src/amf/amfd/chkop.cc b/src/amf/amfd/chkop.cc
index e9a68f4..56b0142 100644
--- a/src/amf/amfd/chkop.cc
+++ b/src/amf/amfd/chkop.cc
@@ -1051,6 +1051,10 @@ uint32_t avsv_send_ckpt_data(AVD_CL_CB *cb, uint32_t 
action,
              avd_cb->avd_peer_ver);
          return NCSCC_RC_SUCCESS;
        }
+      if (avd_cb->avd_peer_ver >= AVD_MBCSV_SUB_PART_VERSION_10) {
+        cb->async_updt_cnt.failover_updt++;
+      }
+
        break;
      default:
        return NCSCC_RC_SUCCESS;
diff --git a/src/amf/amfd/ckpt.h b/src/amf/amfd/ckpt.h
index 875776a..2e15387 100644
--- a/src/amf/amfd/ckpt.h
+++ b/src/amf/amfd/ckpt.h
@@ -35,9 +35,10 @@
  #define AMF_AMFD_CKPT_H_
// current version
-#define AVD_MBCSV_SUB_PART_VERSION 9
+#define AVD_MBCSV_SUB_PART_VERSION 10
// supported versions
+#define AVD_MBCSV_SUB_PART_VERSION_10 10
  #define AVD_MBCSV_SUB_PART_VERSION_9 9
  #define AVD_MBCSV_SUB_PART_VERSION_8 8
  #define AVD_MBCSV_SUB_PART_VERSION_7 7
@@ -109,6 +110,7 @@ typedef struct avsv_async_updt_cnt {
    uint32_t compcstype_updt;
    uint32_t si_trans_updt;
    uint32_t ng_updt;
+  uint32_t failover_updt;
  } AVSV_ASYNC_UPDT_CNT;
/*
diff --git a/src/amf/amfd/ckpt_dec.cc b/src/amf/amfd/ckpt_dec.cc
index a46f6d3..6288b4f 100644
--- a/src/amf/amfd/ckpt_dec.cc
+++ b/src/amf/amfd/ckpt_dec.cc
@@ -178,6 +178,31 @@ const AVSV_DECODE_COLD_SYNC_RSP_DATA_FUNC_PTR 
dec_cs_data_func_list[] = {
      dec_cs_comp_config, dec_cs_comp_cs_type_config, dec_cs_siass,
      dec_cs_si_trans,    dec_cs_async_updt_cnt};
+void set_node_failover_state(AVD_CL_CB *cb, const SaClmNodeIdT node_id,
+        const uint32_t state) {
+  TRACE_ENTER();
+
+  if (state == NodeState::NodeStates::kUndefined) {
+    // not in failover list
+    return;
+  }
+
+  auto failed_node = cb->failover_list.find(node_id);
+  if (failed_node != cb->failover_list.end()) {
+    failed_node->second->SetState(state);
+  } else {
+    LOG_NO("Node '%u' not found in failover_list. Create new entry",
+            node_id);
+    auto new_node = std::make_shared<NodeStateMachine>(cb, node_id);
+    // node must be added to failover_list before SetState() is called.
+    // If the state is 'end', then it will be deleted by SetState().
+    // Otherwise, we will leave a node in 'End' state mistakenly in
+    // failover_list.
+    cb->failover_list[node_id] = new_node;
+    new_node->SetState(state);
+  }
+}
+
  void decode_cb(NCS_UBAID *ub, AVD_CL_CB *cb, const uint16_t peer_version) {
    osaf_decode_uint32(ub, reinterpret_cast<uint32_t *>(&cb->init_state));
    osaf_decode_satimet(ub, &cb->cluster_init_time);
@@ -254,6 +279,9 @@ void decode_node_config(NCS_UBAID *ub, AVD_AVND *avnd,
    osaf_decode_uint32(ub, &avnd->rcv_msg_id);
    osaf_decode_uint32(ub, &avnd->snd_msg_id);
    osaf_extended_name_free(&node_name);
+  if (peer_version >= AVD_MBCSV_SUB_PART_VERSION_10) {
+    osaf_decode_uint32(ub, &avnd->failover_state);
+  }
    TRACE_LEAVE();
  }
@@ -585,7 +613,7 @@ void decode_siass(NCS_UBAID *ub, AVSV_SU_SI_REL_CKPT_MSG *su_si_ckpt,
      su_si_ckpt->csi_add_rem = static_cast<SaBoolT>(csi_add_rem);
      osaf_decode_sanamet(ub, &su_si_ckpt->comp_name);
      osaf_decode_sanamet(ub, &su_si_ckpt->csi_name);
-  };
+  }
  }
/****************************************************************************\
@@ -2199,6 +2227,7 @@ static uint32_t dec_cs_node_config(AVD_CL_CB *cb, 
NCS_MBCSV_CB_DEC *dec,
    for (count = 0; count < num_of_obj; count++) {
      decode_node_config(&dec->i_uba, &avnd, dec->i_peer_version);
      status = avd_ckpt_node(cb, &avnd, dec->i_action);
+    set_node_failover_state(cb, avnd.node_info.nodeId, avnd.failover_state);
      osafassert(status == NCSCC_RC_SUCCESS);
    }
@@ -2552,14 +2581,23 @@ static uint32_t dec_cs_async_updt_cnt(AVD_CL_CB *cb, NCS_MBCSV_CB_DEC *dec,
    /*
     * Decode and send async update counts for all the data structures.
     */
-  if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_7) {
+  if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_10) {
      TRACE(
-        "Peer AMFD version is >= AVD_MBCSV_SUB_PART_VERSION_7,"
+        "Peer AMFD version is >= AVD_MBCSV_SUB_PART_VERSION_10,"
          "peer ver:%d",
          avd_cb->avd_peer_ver);
      status = m_NCS_EDU_VER_EXEC(&cb->edu_hdl, 
avsv_edp_ckpt_msg_async_updt_cnt,
                                  &dec->i_uba, EDP_OP_TYPE_DEC, &updt_cnt,
                                  &ederror, dec->i_peer_version);
+  } else if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_7) {
+    TRACE(
+        "Peer AMFD version is >= AVD_MBCSV_SUB_PART_VERSION_7,"
+        "peer ver:%d",
+        avd_cb->avd_peer_ver);
+    status = m_NCS_EDU_SEL_VER_EXEC(
+        &cb->edu_hdl, avsv_edp_ckpt_msg_async_updt_cnt, &dec->i_uba,
+        EDP_OP_TYPE_DEC, &updt_cnt, &ederror, dec->i_peer_version, 14, 1, 2, 3,
+        4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14);
    } else {
      TRACE(
          "Peer AMFD version is <AVD_MBCSV_SUB_PART_VERSION_7,"
@@ -2607,15 +2645,21 @@ uint32_t avd_dec_warm_sync_rsp(AVD_CL_CB *cb, 
NCS_MBCSV_CB_DEC *dec) {
     * Decode latest async update counts. (In the same manner we received
     * in the last message of the cold sync response.
     */
-  if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_7)
+  if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_10) {
      status = m_NCS_EDU_VER_EXEC(&cb->edu_hdl, 
avsv_edp_ckpt_msg_async_updt_cnt,
                                  &dec->i_uba, EDP_OP_TYPE_DEC, &updt_cnt,
                                  &ederror, dec->i_peer_version);
-  else
+  } else if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_7) {
+    status = m_NCS_EDU_SEL_VER_EXEC(
+        &cb->edu_hdl, avsv_edp_ckpt_msg_async_updt_cnt, &dec->i_uba,
+        EDP_OP_TYPE_DEC, &updt_cnt, &ederror, dec->i_peer_version, 14, 1, 2, 3,
+        4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14);
+  } else {
      status = m_NCS_EDU_SEL_VER_EXEC(
          &cb->edu_hdl, avsv_edp_ckpt_msg_async_updt_cnt, &dec->i_uba,
          EDP_OP_TYPE_DEC, &updt_cnt, &ederror, dec->i_peer_version, 13, 1, 2, 
3,
          4, 5, 6, 7, 8, 9, 10, 11, 12, 13);
+  }
if (status != NCSCC_RC_SUCCESS)
      LOG_ER("%s: decode failed, ederror=%u", __FUNCTION__, ederror);
@@ -2677,6 +2721,9 @@ uint32_t avd_dec_warm_sync_rsp(AVD_CL_CB *cb, 
NCS_MBCSV_CB_DEC *dec) {
      if (updt_cnt->ng_updt != cb->async_updt_cnt.ng_updt)
        LOG_ER("ng_updt counters mismatch: Active: %u Standby: %u",
               updt_cnt->ng_updt, cb->async_updt_cnt.ng_updt);
+    if (updt_cnt->failover_updt != cb->async_updt_cnt.failover_updt)
+      LOG_ER("failover_updt counters mismatch: Active: %u Standby: %u",
+             updt_cnt->failover_updt, cb->async_updt_cnt.failover_updt);
LOG_ER("Out of sync detected in warm sync response, exiting");
      osafassert(0);
@@ -2982,21 +3029,11 @@ static uint32_t dec_node_failover_state(AVD_CL_CB *cb, 
NCS_MBCSV_CB_DEC *dec) {
    osaf_decode_uint32(&dec->i_uba,
                       reinterpret_cast<uint32_t *>(&state));
- auto failed_node = cb->failover_list.find(node->node_info.nodeId);
-  if (failed_node != cb->failover_list.end()) {
-    failed_node->second->SetState(state);
-  } else {
-    LOG_NO("Node '%s' not found in failover_list. Create new entry",
-            node->node_name.c_str());
-    auto new_node = std::make_shared<NodeStateMachine>(cb,
-      node->node_info.nodeId);
-    // node must be added to failover_list before SetState() is called.
-    // If the state is 'end', then it will be deleted by SetState().
-    // Otherwise, we will leave a node in 'End' state mistakenly in
-    // failover_list.
-    cb->failover_list[node->node_info.nodeId] = new_node;
-    new_node->SetState(state);
+  set_node_failover_state(cb, node->node_info.nodeId, state);
+
+  if (dec->i_peer_version >= AVD_MBCSV_SUB_PART_VERSION_10) {
+    cb->async_updt_cnt.failover_updt++;
    }
return NCSCC_RC_SUCCESS;
-}
\ No newline at end of file
+}
diff --git a/src/amf/amfd/ckpt_edu.cc b/src/amf/amfd/ckpt_edu.cc
index 5564c66..570d8bd 100644
--- a/src/amf/amfd/ckpt_edu.cc
+++ b/src/amf/amfd/ckpt_edu.cc
@@ -119,6 +119,8 @@ uint32_t avsv_edp_ckpt_msg_async_updt_cnt(EDU_HDL *hdl, 
EDU_TKN *edu_tkn,
         (long)&((AVSV_ASYNC_UPDT_CNT *)0)->si_trans_updt, 0, nullptr},
        {EDU_EXEC, ncs_edp_uns32, 0, 0, 0,
         (long)&((AVSV_ASYNC_UPDT_CNT *)0)->ng_updt, 0, nullptr},
+      {EDU_EXEC, ncs_edp_uns32, 0, 0, 0,
+       (long)&((AVSV_ASYNC_UPDT_CNT *)0)->failover_updt, 0, nullptr},
{EDU_END, 0, 0, 0, 0, 0, 0, nullptr},
    };
diff --git a/src/amf/amfd/ckpt_enc.cc b/src/amf/amfd/ckpt_enc.cc
index 0e675ae..2ca98f8 100644
--- a/src/amf/amfd/ckpt_enc.cc
+++ b/src/amf/amfd/ckpt_enc.cc
@@ -265,6 +265,9 @@ void encode_node_config(NCS_UBAID *ub, const AVD_AVND *avnd,
    osaf_encode_uint32(ub, AVSV_AVND_CARD_SYS_CON);
    osaf_encode_uint32(ub, avnd->rcv_msg_id);
    osaf_encode_uint32(ub, avnd->snd_msg_id);
+  if (peer_version >= AVD_MBCSV_SUB_PART_VERSION_10) {
+    osaf_encode_uint32(ub, avnd->failover_state);
+  }
  }
/****************************************************************************\
@@ -616,7 +619,7 @@ void encode_siass(NCS_UBAID *ub, const AVD_SU_SI_REL *susi,
      osaf_encode_bool(ub, static_cast<bool>(susi->csi_add_rem));
      osaf_encode_sanamet_o2(ub, susi->comp_name.c_str());
      osaf_encode_sanamet_o2(ub, susi->csi_name.c_str());
-  };
+  }
  }
/****************************************************************************\
diff --git a/src/amf/amfd/node.h b/src/amf/amfd/node.h
index dbe48dc..097f54b 100644
--- a/src/amf/amfd/node.h
+++ b/src/amf/amfd/node.h
@@ -115,6 +115,9 @@ class AVD_AVND {
                                * Checkpointing - Sent independent update
                                */
+ // used for cold sync only
+  uint32_t failover_state{NodeState::NodeStates::kUndefined};
+
    std::vector<AVD_SU *> list_of_ncs_su; /* the list of NCS service units on
                                           * this node.
                                           */
diff --git a/src/amf/amfd/node_state_machine.cc 
b/src/amf/amfd/node_state_machine.cc
index 4653f79..efe2085 100644
--- a/src/amf/amfd/node_state_machine.cc
+++ b/src/amf/amfd/node_state_machine.cc
@@ -43,6 +43,8 @@ void NodeStateMachine::SetState(std::shared_ptr<NodeState> 
state) {
    AVD_AVND *node = avd_node_find_nodeid(node_id_);
    osafassert(node != nullptr);
    m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(cb_, node, AVSV_CKPT_NODE_FAILOVER_STATE);
+  // used for cold sync
+  node->failover_state = state_->GetInt();
if (state->GetInt() == NodeState::kEnd) {
      cb_->failover_list.erase(node_id_);
diff --git a/src/amf/amfd/util.cc b/src/amf/amfd/util.cc
index 0dc3e99..9daa10a 100644
--- a/src/amf/amfd/util.cc
+++ b/src/amf/amfd/util.cc
@@ -1359,6 +1359,7 @@ int amfd_file_dump(const char *filename) {
    fprintf(f, "  compcstype_updt:%d\n", 
avd_cb->async_updt_cnt.compcstype_updt);
    fprintf(f, "  si_trans_updt:%d\n", avd_cb->async_updt_cnt.si_trans_updt);
    fprintf(f, "  ng_updt:%d\n", avd_cb->async_updt_cnt.ng_updt);
+  fprintf(f, "  failover_updt:%d\n", avd_cb->async_updt_cnt.failover_updt);
fprintf(f, "nodes:\n");
    for (const auto &value : *node_id_db) {


_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to