During SC failover, message received on ACTIVE AMFD
can not be checked point to AMFD on STANDBY SC.
But the AMFND still process the message ack for that
message then it remove from queue.
STANDBY SC takes ACTIVE and mismatch message id b/w
AMFD and AMFND on new ACTIVE. As consequence,
clm track start can not invoked to update cluster
member nodes if these nodes was rebooted.

In this case, amfnd need rebooting automatically to
recovery it.
---
 src/amf/amfnd/verify.cc | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/amf/amfnd/verify.cc b/src/amf/amfnd/verify.cc
index 5726ad9..ddb1d15 100644
--- a/src/amf/amfnd/verify.cc
+++ b/src/amf/amfnd/verify.cc
@@ -116,12 +116,14 @@ uint32_t avnd_evt_avd_verify_evh(AVND_CB *cb, AVND_EVT 
*evt) {
       avnd_diq_rec_del(cb, rec);
       continue;
     } else {
+      if ((rcv_id + 1) == (*((uint32_t *)(&rec->msg.info.avd->msg_info))) &&
+          (msg_found == false)) {
+        msg_found = true;
+      }
       avnd_diq_rec_send(cb, rec);
 
       TRACE_1("AVND record %u sent, upon fail-over",
               *((uint32_t *)(&rec->msg.info.avd->msg_info)));
-
-      msg_found = true;
     }
     ++iter;
   }
@@ -129,9 +131,12 @@ uint32_t avnd_evt_avd_verify_evh(AVND_CB *cb, AVND_EVT 
*evt) {
   if ((cb->snd_msg_id != info->rcv_id_cnt) && (msg_found == false)) {
     /* Log error, seems to be some problem.*/
     LOG_EM(
-        "AVND record not found, after failover, snd_msg_id = %u, receive id = 
%u",
-        cb->snd_msg_id, info->rcv_id_cnt);
-    return NCSCC_RC_FAILURE;
+        "AVND record not found for msg id = %u", (rcv_id + 1));
+    opensaf_reboot(
+        avnd_cb->node_info.nodeId,
+        osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
+        "AVND record not found, after failover");
+    exit(0);
   }
 
   /*
-- 
2.7.4



_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to