During SC failover, message received on ACTIVE AMFD can not be
checked point to AMFD on STANDBY SC. But the AMFND still process
the message ack for that message then it remove from queue.
STANDBY SC takes ACTIVE and mismatch message id b/w AMFD and
AMFND on new ACTIVE. As consequence, clm track start can not
invoked to update cluster member nodes if these node was rebooted.
Reboot to recovery this issue.
---
src/amf/amfnd/verify.cc | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/src/amf/amfnd/verify.cc b/src/amf/amfnd/verify.cc
index 5726ad9..e5b1e77 100644
--- a/src/amf/amfnd/verify.cc
+++ b/src/amf/amfnd/verify.cc
@@ -116,12 +116,13 @@ uint32_t avnd_evt_avd_verify_evh(AVND_CB *cb, AVND_EVT
*evt) {
avnd_diq_rec_del(cb, rec);
continue;
} else {
+ if ((rcv_id + 1) == (*((uint32_t *)(&rec->msg.info.avd->msg_info)))) {
+ msg_found = true;
+ }
avnd_diq_rec_send(cb, rec);
TRACE_1("AVND record %u sent, upon fail-over",
*((uint32_t *)(&rec->msg.info.avd->msg_info)));
-
- msg_found = true;
}
++iter;
}
@@ -131,7 +132,11 @@ uint32_t avnd_evt_avd_verify_evh(AVND_CB *cb, AVND_EVT
*evt) {
LOG_EM(
"AVND record not found, after failover, snd_msg_id = %u, receive id =
%u",
cb->snd_msg_id, info->rcv_id_cnt);
- return NCSCC_RC_FAILURE;
+ opensaf_reboot(
+ avnd_cb->node_info.nodeId,
+ osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
+ "AVND record not found, after failover");
+ exit(0);
}
/*
--
2.7.4
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel