If TCP closes a socket (due to TCP_USER_TIMEOUT or some other issue)
because
of a network problem, and DTM declares that the node has exited the
cluster,
the node never attempts to come back into the cluster if it does not
reboot.
Initial discovery broadcasts stop after 5 seconds.
Discovery broadcasts should continue after the initial discovery
phase at a
reduced rate.
---
src/dtm/dtmnd/dtm_cb.h | 1 +
src/dtm/dtmnd/dtm_main.c | 56
+++++++++++++++++++++++++----------------
src/dtm/dtmnd/dtm_read_config.c | 17 +++++++++++++
src/dtm/dtmnd/dtmd.conf | 8 ++++--
4 files changed, 58 insertions(+), 24 deletions(-)
diff --git a/src/dtm/dtmnd/dtm_cb.h b/src/dtm/dtmnd/dtm_cb.h
index f7c48c4..c5b5890 100644
--- a/src/dtm/dtmnd/dtm_cb.h
+++ b/src/dtm/dtmnd/dtm_cb.h
@@ -80,6 +80,7 @@ typedef struct dtm_internode_cb {
DTM_IP_ADDR_TYPE i_addr_family; /* Indicates V4 or V6 */
uint8_t mcast_flag; /* Indicates mcast */
int32_t initial_dis_timeout;
+ int32_t cont_bcast_int;
int64_t bcast_msg_freq;
NCS_PATRICIA_TREE nodeid_tree; /* NODE_DB information of Nodes */
NCS_PATRICIA_TREE comm_sock_tree; /* NODE_DB information of Nodes */
diff --git a/src/dtm/dtmnd/dtm_main.c b/src/dtm/dtmnd/dtm_main.c
index 724ce93..9f6d7cc 100644
--- a/src/dtm/dtmnd/dtm_main.c
+++ b/src/dtm/dtmnd/dtm_main.c
@@ -233,6 +233,30 @@ err:
return rc;
}
+static uint32_t dtm_send_bcast_mcast(DTM_INTERNODE_CB *dtms_cb,
+ void *send_bcast_buffer,
+ size_t bcast_buf_len)
+{
+ uint32_t rc;
+
+ TRACE_ENTER();
+
+ if (dtms_cb->mcast_flag == true) {
+ rc = dtm_dgram_sendto_mcast(dtms_cb, send_bcast_buffer,
bcast_buf_len);
+ if (NCSCC_RC_SUCCESS != rc) {
+ LOG_ER("DTM: dtm_dgram_sendto_mcast Failed rc : %d \n",
rc);
+ }
+ } else {
+ rc = dtm_dgram_sendto_bcast(dtms_cb, send_bcast_buffer,
bcast_buf_len);
+ if (NCSCC_RC_SUCCESS != rc) {
+ LOG_ER("DTM: dtm_dgram_sendto_bcast Failed rc : %d \n",
rc);
+ }
+ }
+
+ TRACE_LEAVE();
+ return rc;
+}
+
/**
* DTM process main function
*
@@ -357,26 +381,7 @@ int main(int argc, char *argv[])
/* Broadcast msg string in datagram to clients every 250 m
* seconds */
- if (dtms_cb->mcast_flag == true) {
-
- rc = dtm_dgram_sendto_mcast(dtms_cb, send_bcast_buffer,
- bcast_buf_len);
- if (NCSCC_RC_SUCCESS != rc) {
- LOG_ER(
- "DTM: dtm_dgram_sendto_mcast Failed rc : %d \n",
- rc);
- }
-
- } else {
-
- rc = dtm_dgram_sendto_bcast(dtms_cb, send_bcast_buffer,
- bcast_buf_len);
- if (NCSCC_RC_SUCCESS != rc) {
- LOG_ER(
- "DTM: dtm_dgram_sendto_bcast Failed rc : %d \n",
- rc);
- }
- }
+ dtm_send_bcast_mcast(dtms_cb, send_bcast_buffer,
bcast_buf_len);
dis_elapsed_time_usec =
dis_elapsed_time_usec + (dtms_cb->bcast_msg_freq * 1000);
@@ -387,8 +392,15 @@ int main(int argc, char *argv[])
/*************************************************************/
initial_discovery_phase = false;
while (1) {
- m_NCS_TASK_SLEEP(0xfffffff0);
- /* m_NCS_TASK_SLEEP(30000); */
+ if (dtms_cb->cont_bcast_int) {
+ m_NCS_TASK_SLEEP(dtms_cb->cont_bcast_int * 1000);
+ /* periodically send a broadcast */
+ dtm_send_bcast_mcast(dtms_cb,
+ send_bcast_buffer,
+ bcast_buf_len);
+ } else {
+ m_NCS_TASK_SLEEP(0xfffffff0);
+ }
}
done1:
LOG_ER("DTM : dtm_destroy_service_discovery_task exiting...");
diff --git a/src/dtm/dtmnd/dtm_read_config.c
b/src/dtm/dtmnd/dtm_read_config.c
index 85f34f4..0db21e8 100644
--- a/src/dtm/dtmnd/dtm_read_config.c
+++ b/src/dtm/dtmnd/dtm_read_config.c
@@ -39,6 +39,7 @@ extern uint32_t intranode_max_processes;
#define KEEPALIVE_PROBES 9
#define DIS_TIME_OUT 5
#define BCAST_FRE 250
+#define CONT_BCAST_INT 30
#define USER_TIMEOUT 1500 // 1.5 sec to match other transport
const char *IN6ADDR_LINK_LOCAL =
@@ -108,6 +109,8 @@ void dtm_print_config(DTM_INTERNODE_CB *config)
TRACE(" %d", config->initial_dis_timeout);
TRACE(" DTM_BCAST_FRE_MSECS: ");
TRACE(" %" PRId64 "", config->bcast_msg_freq);
+ TRACE(" DTM_CONT_BCAST_INT: ");
+ TRACE(" %d", config->cont_bcast_int);
TRACE(" DTM_SOCK_SND_BUF_SIZE: ");
TRACE(" %d", config->sock_sndbuf_size);
TRACE(" DTM_SOCK_RCV_BUF_SIZE: ");
@@ -277,6 +280,7 @@ int dtm_read_config(DTM_INTERNODE_CB *config,
char *dtm_config_file)
config->comm_keepidle_time = USER_TIMEOUT;
config->i_addr_family = DTM_IP_ADDR_TYPE_IPV4;
config->bcast_msg_freq = BCAST_FRE;
+ config->cont_bcast_int = CONT_BCAST_INT;
config->initial_dis_timeout = DIS_TIME_OUT;
config->sock_sndbuf_size = 0;
config->sock_rcvbuf_size = 0;
@@ -431,6 +435,19 @@ int dtm_read_config(DTM_INTERNODE_CB *config,
char *dtm_config_file)
tag = 0;
tag_len = 0;
+
+ }
+ if (strncmp(line, "DTM_CONTINUOUS_BCAST_INT=",
strlen("DTM_CONTINUOUS_BCAST_INT=")) == 0) {
+ tag_len = strlen("DTM_CONTINUOUS_BCAST_INT=");
+ config->cont_bcast_int = atoi(&line[tag_len]);
+ if (config->cont_bcast_int < 0) {
+ LOG_ER("DTM:cont_bcast_int must be 0 or greater");
+ fclose(dtm_conf_file);
+ return -1;
+ }
+
+ tag = 0;
+ tag_len = 0;
}
if (strncmp(line, "DTM_BCAST_FRE_MSECS=",
strlen("DTM_BCAST_FRE_MSECS=")) == 0) {
diff --git a/src/dtm/dtmnd/dtmd.conf b/src/dtm/dtmnd/dtmd.conf
index c93b340..51bebb2 100644
--- a/src/dtm/dtmnd/dtmd.conf
+++ b/src/dtm/dtmnd/dtmd.conf
@@ -38,10 +38,14 @@ DTM_UDP_BCAST_SND_PORT=6800
DTM_UDP_BCAST_REV_PORT=6900
#
-# udp_broadcast_port: The UDP port that the DTMSv listens on
-# Mandatory
+# bcast_freq: frequencey (in milliseconds) between broadcasts during
initial
+# discovery
DTM_BCAST_FRE_MSECS=250
+# cont_bcast_int: interval (in seconds) between broadcasts after
initial
+# discovery has completed
+DTM_CONTINUOUS_BCAST_INT=30
+
# dtm_discovery_timeout_secs: This is the time dtmc should be
allowed to wait
# for a service script finish executing. If the service is not
finished
# within this time dtmc will send a timeout message to DTMSv