[devel] [PATCH 2/2] rde: Use broadcast for peer info message [#3263]

2021-05-25 Thread Minh Chau
RDE sends peer info message to whom it detects in peer up message.
In roaming SC, when all SCs rejoin from network split, all RDE now
are active. The duplicated active detection relies on peer info
message, which could be seen as one-on-one detection. The mechanism
may cause the last SC not detected if all other SCs are detected as
duplicated active and reboot.

The patch changes to use broadcast peer info message to increase
the possibility of receiving peer info message from all other SCs
---
 src/rde/rded/rde_cb.h|  2 +-
 src/rde/rded/rde_main.cc | 22 --
 src/rde/rded/rde_mds.cc  | 20 +++-
 3 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h
index 50a0a0d26..b744b7c72 100644
--- a/src/rde/rded/rde_cb.h
+++ b/src/rde/rded/rde_cb.h
@@ -101,7 +101,7 @@ extern uint32_t rde_mds_register();
 extern uint32_t rde_discovery_mds_register();
 extern uint32_t rde_mds_unregister();
 extern uint32_t rde_discovery_mds_unregister();
-extern uint32_t rde_mds_send(rde_msg *msg, MDS_DEST to_dest);
+extern uint32_t rde_mds_broadcast(rde_msg *msg);
 extern uint32_t rde_set_role(PCS_RDA_ROLE role);
 
 #endif  // RDE_RDED_RDE_CB_H_
diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc
index e6bd759ec..8ed6b046e 100644
--- a/src/rde/rded/rde_main.cc
+++ b/src/rde/rded/rde_main.cc
@@ -46,7 +46,7 @@
 enum { FD_TERM = 0, FD_AMF = 1, FD_MBX, FD_RDA_SERVER,
FD_SIGHUP, FD_CLIENT_START };
 
-static void SendPeerInfoResp(MDS_DEST mds_dest);
+static void BroadcastPeerInfoResp();
 static void CheckForSplitBrain(const rde_msg *msg);
 
 const char *rde_msg_name[] = {"-",
@@ -105,18 +105,20 @@ static void handle_mbx_event() {
   switch (msg->type) {
 case RDE_MSG_PEER_INFO_REQ:
 case RDE_MSG_PEER_INFO_RESP: {
-  LOG_NO("Got peer info %s from node 0x%x with role %s",
- msg->type == RDE_MSG_PEER_INFO_RESP ? "response" : "request",
- msg->fr_node_id, Role::to_string(msg->info.peer_info.ha_role));
-  CheckForSplitBrain(msg);
-  role->SetPeerState(msg->info.peer_info.ha_role, msg->fr_node_id,
- msg->info.peer_info.promote_pending);
+  if (msg->fr_node_id != own_node_id) {
+LOG_NO("Got peer info %s from node 0x%x with role %s",
+msg->type == RDE_MSG_PEER_INFO_RESP ? "response" : "request",
+msg->fr_node_id, Role::to_string(msg->info.peer_info.ha_role));
+CheckForSplitBrain(msg);
+role->SetPeerState(msg->info.peer_info.ha_role, msg->fr_node_id,
+msg->info.peer_info.promote_pending);
+  }
   break;
 }
 case RDE_MSG_PEER_UP: {
   if (msg->fr_node_id != own_node_id) {
 LOG_NO("Peer up on node 0x%x", msg->fr_node_id);
-SendPeerInfoResp(msg->fr_dest);
+BroadcastPeerInfoResp();
 role->AddPeer(msg->fr_node_id);
   }
   break;
@@ -284,7 +286,7 @@ static void CheckForSplitBrain(const rde_msg *msg) {
   }
 }
 
-static void SendPeerInfoResp(MDS_DEST mds_dest) {
+static void BroadcastPeerInfoResp() {
   RDE_CONTROL_BLOCK *cb = rde_get_control_block();
   rde_msg peer_info_req;
   peer_info_req.type = RDE_MSG_PEER_INFO_RESP;
@@ -294,7 +296,7 @@ static void SendPeerInfoResp(MDS_DEST mds_dest) {
 cb->promote_pending = base::TimespecToMillis(now - cb->promote_start);
   }
   peer_info_req.info.peer_info.promote_pending = cb->promote_pending;
-  rde_mds_send(_info_req, mds_dest);
+  rde_mds_broadcast(_info_req);
 }
 
 /**
diff --git a/src/rde/rded/rde_mds.cc b/src/rde/rded/rde_mds.cc
index a32f54082..4591d1996 100644
--- a/src/rde/rded/rde_mds.cc
+++ b/src/rde/rded/rde_mds.cc
@@ -209,6 +209,8 @@ static uint32_t mds_callback(struct ncsmds_callback_info 
*info) {
   msg = (struct rde_msg *)info->info.receive.i_msg;
   msg->fr_dest = info->info.receive.i_fr_dest;
   msg->fr_node_id = info->info.receive.i_node_id;
+  TRACE("MDS RECEIVE dest: %" PRIx64 ", node ID: %x, msg_type: %d",
+  msg->fr_dest, msg->fr_node_id, msg->type);
   if (ncs_ipc_send(>mbx, reinterpret_cast(
  info->info.receive.i_msg),
NCS_IPC_PRIORITY_NORMAL) != NCSCC_RC_SUCCESS) {
@@ -385,11 +387,11 @@ uint32_t rde_discovery_mds_unregister() {
   return rc;
 }
 
-uint32_t rde_mds_send(struct rde_msg *msg, MDS_DEST to_dest) {
+uint32_t rde_mds_broadcast(struct rde_msg *msg) {
   NCSMDS_INFO info;
   uint32_t rc;
 
-  TRACE("Sending %s to %" PRIx64, rde_msg_name[msg->type], to_dest);
+  TRACE("Sending %s to all rded instances", rde_msg_name[msg->type]);
   memset(, 0, sizeof(info));
 
   info.i_mds_hdl = mds_hdl;
@@ -397,21 +399,21 @@ uint32_t rde_mds_send(struct rde_msg *msg, MDS_DEST 
to_dest) {
   info.i_svc_id = NCSMDS_SVC_ID_RDE;
 
   info.info.svc_send.i_msg = msg;
-  info.info.svc_send.i_priority = MDS_SEND_PRIORITY_MEDIUM;
-  info.info.svc_send.i_sendtype = MDS_SENDTYPE_SND;
+  

[devel] [PATCH 2/2] rde: Use broadcast for peer info message [#3263]

2021-05-24 Thread Minh Chau
RDE sends peer info message to whom it detects in peer up message.
In roaming SC, when all SCs rejoin from network split, all RDE now
are active. The duplicated active detection relies on peer info
message, which could be seen as one-on-one detection. The mechanism
may cause the last SC not detected if all other SCs are detected as
duplicated active and reboot.

The patch changes to use broadcast peer info message to increase
the possibility of receiving peer info message from all other SCs
---
 src/rde/rded/rde_cb.h|  2 +-
 src/rde/rded/rde_main.cc | 22 --
 src/rde/rded/rde_mds.cc  | 20 +++-
 3 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h
index 50a0a0d26..b744b7c72 100644
--- a/src/rde/rded/rde_cb.h
+++ b/src/rde/rded/rde_cb.h
@@ -101,7 +101,7 @@ extern uint32_t rde_mds_register();
 extern uint32_t rde_discovery_mds_register();
 extern uint32_t rde_mds_unregister();
 extern uint32_t rde_discovery_mds_unregister();
-extern uint32_t rde_mds_send(rde_msg *msg, MDS_DEST to_dest);
+extern uint32_t rde_mds_broadcast(rde_msg *msg);
 extern uint32_t rde_set_role(PCS_RDA_ROLE role);
 
 #endif  // RDE_RDED_RDE_CB_H_
diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc
index e6bd759ec..8ed6b046e 100644
--- a/src/rde/rded/rde_main.cc
+++ b/src/rde/rded/rde_main.cc
@@ -46,7 +46,7 @@
 enum { FD_TERM = 0, FD_AMF = 1, FD_MBX, FD_RDA_SERVER,
FD_SIGHUP, FD_CLIENT_START };
 
-static void SendPeerInfoResp(MDS_DEST mds_dest);
+static void BroadcastPeerInfoResp();
 static void CheckForSplitBrain(const rde_msg *msg);
 
 const char *rde_msg_name[] = {"-",
@@ -105,18 +105,20 @@ static void handle_mbx_event() {
   switch (msg->type) {
 case RDE_MSG_PEER_INFO_REQ:
 case RDE_MSG_PEER_INFO_RESP: {
-  LOG_NO("Got peer info %s from node 0x%x with role %s",
- msg->type == RDE_MSG_PEER_INFO_RESP ? "response" : "request",
- msg->fr_node_id, Role::to_string(msg->info.peer_info.ha_role));
-  CheckForSplitBrain(msg);
-  role->SetPeerState(msg->info.peer_info.ha_role, msg->fr_node_id,
- msg->info.peer_info.promote_pending);
+  if (msg->fr_node_id != own_node_id) {
+LOG_NO("Got peer info %s from node 0x%x with role %s",
+msg->type == RDE_MSG_PEER_INFO_RESP ? "response" : "request",
+msg->fr_node_id, Role::to_string(msg->info.peer_info.ha_role));
+CheckForSplitBrain(msg);
+role->SetPeerState(msg->info.peer_info.ha_role, msg->fr_node_id,
+msg->info.peer_info.promote_pending);
+  }
   break;
 }
 case RDE_MSG_PEER_UP: {
   if (msg->fr_node_id != own_node_id) {
 LOG_NO("Peer up on node 0x%x", msg->fr_node_id);
-SendPeerInfoResp(msg->fr_dest);
+BroadcastPeerInfoResp();
 role->AddPeer(msg->fr_node_id);
   }
   break;
@@ -284,7 +286,7 @@ static void CheckForSplitBrain(const rde_msg *msg) {
   }
 }
 
-static void SendPeerInfoResp(MDS_DEST mds_dest) {
+static void BroadcastPeerInfoResp() {
   RDE_CONTROL_BLOCK *cb = rde_get_control_block();
   rde_msg peer_info_req;
   peer_info_req.type = RDE_MSG_PEER_INFO_RESP;
@@ -294,7 +296,7 @@ static void SendPeerInfoResp(MDS_DEST mds_dest) {
 cb->promote_pending = base::TimespecToMillis(now - cb->promote_start);
   }
   peer_info_req.info.peer_info.promote_pending = cb->promote_pending;
-  rde_mds_send(_info_req, mds_dest);
+  rde_mds_broadcast(_info_req);
 }
 
 /**
diff --git a/src/rde/rded/rde_mds.cc b/src/rde/rded/rde_mds.cc
index a32f54082..4591d1996 100644
--- a/src/rde/rded/rde_mds.cc
+++ b/src/rde/rded/rde_mds.cc
@@ -209,6 +209,8 @@ static uint32_t mds_callback(struct ncsmds_callback_info 
*info) {
   msg = (struct rde_msg *)info->info.receive.i_msg;
   msg->fr_dest = info->info.receive.i_fr_dest;
   msg->fr_node_id = info->info.receive.i_node_id;
+  TRACE("MDS RECEIVE dest: %" PRIx64 ", node ID: %x, msg_type: %d",
+  msg->fr_dest, msg->fr_node_id, msg->type);
   if (ncs_ipc_send(>mbx, reinterpret_cast(
  info->info.receive.i_msg),
NCS_IPC_PRIORITY_NORMAL) != NCSCC_RC_SUCCESS) {
@@ -385,11 +387,11 @@ uint32_t rde_discovery_mds_unregister() {
   return rc;
 }
 
-uint32_t rde_mds_send(struct rde_msg *msg, MDS_DEST to_dest) {
+uint32_t rde_mds_broadcast(struct rde_msg *msg) {
   NCSMDS_INFO info;
   uint32_t rc;
 
-  TRACE("Sending %s to %" PRIx64, rde_msg_name[msg->type], to_dest);
+  TRACE("Sending %s to all rded instances", rde_msg_name[msg->type]);
   memset(, 0, sizeof(info));
 
   info.i_mds_hdl = mds_hdl;
@@ -397,21 +399,21 @@ uint32_t rde_mds_send(struct rde_msg *msg, MDS_DEST 
to_dest) {
   info.i_svc_id = NCSMDS_SVC_ID_RDE;
 
   info.info.svc_send.i_msg = msg;
-  info.info.svc_send.i_priority = MDS_SEND_PRIORITY_MEDIUM;
-  info.info.svc_send.i_sendtype = MDS_SENDTYPE_SND;
+