ack

On 22/2/22 12:34 pm, thang.d.nguyen wrote:
During SC failover, message sent on ACTIVE AMFD can not be
checked point to AMFD on STANDBY SC. But the AMFND still
increase receive/send msg id count. Then STANDBY SC takes
ACTIVE and mismatch message id b/w AMFND and new active AMFD.
Solution is to make msg id count alignment b/w AMFD/AMFND
in this case.
---
  src/amf/amfnd/avnd_cb.h |  1 +
  src/amf/amfnd/di.cc     | 22 +++++++++++++++++++---
  src/amf/amfnd/main.cc   |  2 ++
  src/amf/amfnd/verify.cc | 28 +++++++++++++++++++---------
  4 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/src/amf/amfnd/avnd_cb.h b/src/amf/amfnd/avnd_cb.h
index 8af5e5fe1..a8241b965 100644
--- a/src/amf/amfnd/avnd_cb.h
+++ b/src/amf/amfnd/avnd_cb.h
@@ -96,6 +96,7 @@ typedef struct avnd_cb_tag {
    uint32_t rcv_msg_id; /* Message ID of the last message received */
    /* AvD messaging params (retransmit list etc.) */
    uint32_t snd_msg_id; /* send msg id */
+  uint32_t active_ack_msg_id;  // msg id acked by active
/** List of messages sent to director but not yet acked.
     * Messages are removed when acked with the ACK message.
diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
index 40229438d..d1d83bab6 100644
--- a/src/amf/amfnd/di.cc
+++ b/src/amf/amfnd/di.cc
@@ -819,6 +819,7 @@ uint32_t avnd_evt_mds_avd_dn_evh(AVND_CB *cb, AVND_EVT 
*evt) {
    // reset msg_id counter
    cb->rcv_msg_id = 0;
    cb->snd_msg_id = 0;
+  cb->active_ack_msg_id = 0;
//Inform AMFA about SCs absence now.
    avnd_send_sc_status_message(OSAF_AMF_SC_ABSENT);
@@ -1260,10 +1261,23 @@ uint32_t avnd_di_ack_nack_msg_send(AVND_CB *cb, 
uint32_t rcv_id,
    msg.info.avd->msg_info.n2d_ack_nack_info.msg_id = (cb->snd_msg_id + 1);
    msg.info.avd->msg_info.n2d_ack_nack_info.node_id = cb->node_info.nodeId;
- if (rcv_id != cb->rcv_msg_id)
-    msg.info.avd->msg_info.n2d_ack_nack_info.ack = false;
-  else
+  if (rcv_id != cb->rcv_msg_id) {
+    LOG_WA("Mismatch msg id, AVD send ID count: %u, "
+          "AVND receive ID count: %u", rcv_id, cb->rcv_msg_id);
+    // During SC failover, message sent on ACTIVE AMFD can not
+    // be checked point to AMFD on STANDBY SC. But the AMFND still
+    // receive msg id. STANDBY SC takes ACTIVE and mismatch message
+    // id b/w AMFD and AMFND on new ACTIVE. In this case AVND receive
+    // ID count greater than AVD sent id count. Shoudl rsp ack(true).
+    if (cb->rcv_msg_id > rcv_id) {
+      cb->rcv_msg_id = rcv_id;
+      msg.info.avd->msg_info.n2d_ack_nack_info.ack = true;
+    } else {
+      msg.info.avd->msg_info.n2d_ack_nack_info.ack = false;
+    }
+  } else {
      msg.info.avd->msg_info.n2d_ack_nack_info.ack = true;
+  }
TRACE_1("MsgId=%u,ACK=%u", msg.info.avd->msg_info.n2d_ack_nack_info.msg_id,
            msg.info.avd->msg_info.n2d_ack_nack_info.ack);
@@ -1363,6 +1377,8 @@ uint32_t avnd_di_node_down_msg_send(AVND_CB *cb)
  void avnd_di_msg_ack_process(AVND_CB *cb, uint32_t mid) {
    TRACE_ENTER2("%u", mid);
+ cb->active_ack_msg_id = mid;
+
    for (auto iter =  cb->dnd_list.begin(); iter != cb->dnd_list.end(); ++iter) 
{
      auto rec = *iter;
      osafassert(rec->msg.type == AVND_MSG_AVD);
diff --git a/src/amf/amfnd/main.cc b/src/amf/amfnd/main.cc
index 265907917..24c2e9b85 100644
--- a/src/amf/amfnd/main.cc
+++ b/src/amf/amfnd/main.cc
@@ -343,6 +343,8 @@ AVND_CB *avnd_cb_create() {
    cb->is_avd_down = true;
    cb->amfd_sync_required = false;
+ cb->active_ack_msg_id = 0;
+
    // retrieve hydra configuration from IMM
    hydra_config_get(cb);
    cb->sc_absence_tmr.is_active = false;
diff --git a/src/amf/amfnd/verify.cc b/src/amf/amfnd/verify.cc
index e5b1e7793..325d170e7 100644
--- a/src/amf/amfnd/verify.cc
+++ b/src/amf/amfnd/verify.cc
@@ -128,15 +128,25 @@ uint32_t avnd_evt_avd_verify_evh(AVND_CB *cb, AVND_EVT 
*evt) {
    }
if ((cb->snd_msg_id != info->rcv_id_cnt) && (msg_found == false)) {
-    /* Log error, seems to be some problem.*/
-    LOG_EM(
-        "AVND record not found, after failover, snd_msg_id = %u, receive id = 
%u",
-        cb->snd_msg_id, info->rcv_id_cnt);
-    opensaf_reboot(
-        avnd_cb->node_info.nodeId,
-        osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
-        "AVND record not found, after failover");
-    exit(0);
+    if (cb->snd_msg_id == cb->active_ack_msg_id) {
+      // During SC failover, message received on ACTIVE AMFD can not
+      // be checked point to AMFD on STANDBY SC. But the AMFND still
+      // process the message ack for that message then it remove from queue.
+      // STANDBY SC takes ACTIVE and mismatch message id b/w AMFD and AMFND
+      // on new ACTIVE. In this case AVND send ID count greater than AVD 
receive
+      // ID count on new ACTIVE. Shoudl realign.
+      cb->snd_msg_id = info->rcv_id_cnt;
+    } else {
+      /* Log error, seems to be some problem.*/
+      LOG_EM(
+            "AVND record not found, after failover, snd_msg_id = %u, receive id = 
%u",
+            cb->snd_msg_id, info->rcv_id_cnt);
+        opensaf_reboot(
+            avnd_cb->node_info.nodeId,
+            
osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
+            "AVND record not found, after failover");
+        exit(0);
+    }
    }
/*


_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to