During SC failover, message sent on ACTIVE AMFD can not be
checked point to AMFD on STANDBY SC. But the AMFND still
increase receive/send msg id count. Then STANDBY SC takes
ACTIVE and mismatch message id b/w AMFND and new active AMFD.
Solution is to make msg id count alignment b/w AMFD/AMFND
in this case.
---
src/amf/amfnd/avnd_cb.h | 1 +
src/amf/amfnd/di.cc | 19 ++++++++++++++++---
src/amf/amfnd/verify.cc | 24 +++++++++++++++++-------
3 files changed, 34 insertions(+), 10 deletions(-)
diff --git a/src/amf/amfnd/avnd_cb.h b/src/amf/amfnd/avnd_cb.h
index 8af5e5fe1..a8241b965 100644
--- a/src/amf/amfnd/avnd_cb.h
+++ b/src/amf/amfnd/avnd_cb.h
@@ -96,6 +96,7 @@ typedef struct avnd_cb_tag {
uint32_t rcv_msg_id; /* Message ID of the last message received */
/* AvD messaging params (retransmit list etc.) */
uint32_t snd_msg_id; /* send msg id */
+ uint32_t active_ack_msg_id; // msg id acked by active
/** List of messages sent to director but not yet acked.
* Messages are removed when acked with the ACK message.
diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
index 40229438d..1ebf22d65 100644
--- a/src/amf/amfnd/di.cc
+++ b/src/amf/amfnd/di.cc
@@ -1260,10 +1260,23 @@ uint32_t avnd_di_ack_nack_msg_send(AVND_CB *cb,
uint32_t rcv_id,
msg.info.avd->msg_info.n2d_ack_nack_info.msg_id = (cb->snd_msg_id + 1);
msg.info.avd->msg_info.n2d_ack_nack_info.node_id = cb->node_info.nodeId;
- if (rcv_id != cb->rcv_msg_id)
- msg.info.avd->msg_info.n2d_ack_nack_info.ack = false;
- else
+ if (rcv_id != cb->rcv_msg_id) {
+ LOG_WA("Mismatch msg id, AVD send ID count: %u, "
+ "AVND receive ID count: %u", rcv_id, cb->rcv_msg_id);
+ // During SC failover, message sent on ACTIVE AMFD can not
+ // be checked point to AMFD on STANDBY SC. But the AMFND still
+ // receive msg id. STANDBY SC takes ACTIVE and mismatch message
+ // id b/w AMFD and AMFND on new ACTIVE. In this case AVND receive
+ // ID count greater than AVD sent id count. Shoudl rsp ack(true).
+ if (cb->rcv_msg_id > rcv_id) {
+ cb->rcv_msg_id = rcv_id;
+ msg.info.avd->msg_info.n2d_ack_nack_info.ack = true;
+ } else {
+ msg.info.avd->msg_info.n2d_ack_nack_info.ack = false;
+ }
+ } else {
msg.info.avd->msg_info.n2d_ack_nack_info.ack = true;
+ }
TRACE_1("MsgId=%u,ACK=%u", msg.info.avd->msg_info.n2d_ack_nack_info.msg_id,
msg.info.avd->msg_info.n2d_ack_nack_info.ack);
diff --git a/src/amf/amfnd/verify.cc b/src/amf/amfnd/verify.cc
index e5b1e7793..d6edc8855 100644
--- a/src/amf/amfnd/verify.cc
+++ b/src/amf/amfnd/verify.cc
@@ -128,15 +128,25 @@ uint32_t avnd_evt_avd_verify_evh(AVND_CB *cb, AVND_EVT
*evt) {
}
if ((cb->snd_msg_id != info->rcv_id_cnt) && (msg_found == false)) {
+ if (cb->snd_msg_id == cb->active_ack_msg_id) {
+ // During SC failover, message received on ACTIVE AMFD can not
+ // be checked point to AMFD on STANDBY SC. But the AMFND still
+ // process the message ack for that message then it remove from queue.
+ // STANDBY SC takes ACTIVE and mismatch message id b/w AMFD and AMFND
+ // on new ACTIVE. In this case AVND send ID count greater than AVD
receive
+ // ID count on new ACTIVE. Shoudl realign.
+ cb->snd_msg_id = info->rcv_id_cnt;
+ } else {
/* Log error, seems to be some problem.*/
LOG_EM(
- "AVND record not found, after failover, snd_msg_id = %u, receive id =
%u",
- cb->snd_msg_id, info->rcv_id_cnt);
- opensaf_reboot(
- avnd_cb->node_info.nodeId,
- osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
- "AVND record not found, after failover");
- exit(0);
+ "AVND record not found, after failover, snd_msg_id = %u, receive id
= %u",
+ cb->snd_msg_id, info->rcv_id_cnt);
+ opensaf_reboot(
+ avnd_cb->node_info.nodeId,
+ osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
+ "AVND record not found, after failover");
+ exit(0);
+ }
}
/*
--
2.25.1
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel