Hi Alex Jones,

I don't think you required an additional configuration (CONT_BCAST_INT) for continuing dtm discovery, you can utilize the DTM_INI_DIS_TIMEOUT_SECS=5 ( dtm_discovery_timeout_secs ), it self like :
Just by adding  additional comment saying :

"# Comment the next line (DTM_INI_DIS_TIMEOUT_SECS) to enable continuing dtm discovery"

==========================================================================================
# dtm_discovery_timeout_secs: This is the time dtmc should be allowed to wait
# for a service script finish executing. If the service is not finished
# within this time dtmc will send a timeout message to DTMSv
# Mandatory
DTM_INI_DIS_TIMEOUT_SECS=5
==========================================================================================

And while reading configuration DTM_INI_DIS_TIMEOUT_SECS is ZERO, change the logic to continuing dtm discovery

-AVM

On 8/11/2017 8:51 PM, Alex Jones wrote:
If TCP closes a socket (due to TCP_USER_TIMEOUT or some other issue) because
of a network problem, and DTM declares that the node has exited the cluster,
the node never attempts to come back into the cluster if it does not reboot.

Initial discovery broadcasts stop after 5 seconds.

Discovery broadcasts should continue after the initial discovery phase at a
reduced rate.
---
  src/dtm/dtmnd/dtm_cb.h          |  1 +
  src/dtm/dtmnd/dtm_main.c        | 56 +++++++++++++++++++++++++----------------
  src/dtm/dtmnd/dtm_read_config.c | 17 +++++++++++++
  src/dtm/dtmnd/dtmd.conf         |  8 ++++--
  4 files changed, 58 insertions(+), 24 deletions(-)

diff --git a/src/dtm/dtmnd/dtm_cb.h b/src/dtm/dtmnd/dtm_cb.h
index f7c48c4..c5b5890 100644
--- a/src/dtm/dtmnd/dtm_cb.h
+++ b/src/dtm/dtmnd/dtm_cb.h
@@ -80,6 +80,7 @@ typedef struct dtm_internode_cb {
    DTM_IP_ADDR_TYPE i_addr_family; /* Indicates V4 or V6 */
    uint8_t mcast_flag;             /* Indicates mcast */
    int32_t initial_dis_timeout;
+  int32_t cont_bcast_int;
    int64_t bcast_msg_freq;
    NCS_PATRICIA_TREE nodeid_tree;    /* NODE_DB information of Nodes */
    NCS_PATRICIA_TREE comm_sock_tree; /* NODE_DB information of Nodes */
diff --git a/src/dtm/dtmnd/dtm_main.c b/src/dtm/dtmnd/dtm_main.c
index 724ce93..9f6d7cc 100644
--- a/src/dtm/dtmnd/dtm_main.c
+++ b/src/dtm/dtmnd/dtm_main.c
@@ -233,6 +233,30 @@ err:
        return rc;
  }
+static uint32_t dtm_send_bcast_mcast(DTM_INTERNODE_CB *dtms_cb,
+                                       void *send_bcast_buffer,
+                                       size_t bcast_buf_len)
+{
+       uint32_t rc;
+
+       TRACE_ENTER();
+
+       if (dtms_cb->mcast_flag == true) {
+               rc = dtm_dgram_sendto_mcast(dtms_cb, send_bcast_buffer, 
bcast_buf_len);
+               if (NCSCC_RC_SUCCESS != rc) {
+                       LOG_ER("DTM: dtm_dgram_sendto_mcast Failed rc : %d \n", 
rc);
+               }
+       } else {
+               rc = dtm_dgram_sendto_bcast(dtms_cb, send_bcast_buffer, 
bcast_buf_len);
+               if (NCSCC_RC_SUCCESS != rc) {
+                       LOG_ER("DTM: dtm_dgram_sendto_bcast Failed rc : %d \n", 
rc);
+               }
+       }
+
+       TRACE_LEAVE();
+       return rc;
+}
+
  /**
   *  DTM process main function
   *
@@ -357,26 +381,7 @@ int main(int argc, char *argv[])
/* Broadcast msg string in datagram to clients every 250 m
                 * seconds */
-               if (dtms_cb->mcast_flag == true) {
-
-                       rc = dtm_dgram_sendto_mcast(dtms_cb, send_bcast_buffer,
-                                                   bcast_buf_len);
-                       if (NCSCC_RC_SUCCESS != rc) {
-                               LOG_ER(
-                                   "DTM: dtm_dgram_sendto_mcast Failed rc : %d 
\n",
-                                   rc);
-                       }
-
-               } else {
-
-                       rc = dtm_dgram_sendto_bcast(dtms_cb, send_bcast_buffer,
-                                                   bcast_buf_len);
-                       if (NCSCC_RC_SUCCESS != rc) {
-                               LOG_ER(
-                                   "DTM: dtm_dgram_sendto_bcast Failed rc : %d 
\n",
-                                   rc);
-                       }
-               }
+               dtm_send_bcast_mcast(dtms_cb, send_bcast_buffer, bcast_buf_len);
dis_elapsed_time_usec =
                    dis_elapsed_time_usec + (dtms_cb->bcast_msg_freq * 1000);
@@ -387,8 +392,15 @@ int main(int argc, char *argv[])
        /*************************************************************/
        initial_discovery_phase = false;
        while (1) {
-               m_NCS_TASK_SLEEP(0xfffffff0);
-               /* m_NCS_TASK_SLEEP(30000); */
+               if (dtms_cb->cont_bcast_int) {
+                       m_NCS_TASK_SLEEP(dtms_cb->cont_bcast_int * 1000);
+                       /* periodically send a broadcast */
+                       dtm_send_bcast_mcast(dtms_cb,
+                                               send_bcast_buffer,
+                                               bcast_buf_len);
+               } else {
+                       m_NCS_TASK_SLEEP(0xfffffff0);
+               }
        }
  done1:
        LOG_ER("DTM : dtm_destroy_service_discovery_task exiting...");
diff --git a/src/dtm/dtmnd/dtm_read_config.c b/src/dtm/dtmnd/dtm_read_config.c
index 85f34f4..0db21e8 100644
--- a/src/dtm/dtmnd/dtm_read_config.c
+++ b/src/dtm/dtmnd/dtm_read_config.c
@@ -39,6 +39,7 @@ extern uint32_t intranode_max_processes;
  #define KEEPALIVE_PROBES 9
  #define DIS_TIME_OUT 5
  #define BCAST_FRE 250
+#define CONT_BCAST_INT 30
  #define USER_TIMEOUT 1500 // 1.5 sec to match other transport
const char *IN6ADDR_LINK_LOCAL =
@@ -108,6 +109,8 @@ void dtm_print_config(DTM_INTERNODE_CB *config)
        TRACE("  %d", config->initial_dis_timeout);
        TRACE("  DTM_BCAST_FRE_MSECS: ");
        TRACE("  %" PRId64 "", config->bcast_msg_freq);
+       TRACE("  DTM_CONT_BCAST_INT: ");
+       TRACE("  %d", config->cont_bcast_int);
        TRACE("  DTM_SOCK_SND_BUF_SIZE: ");
        TRACE("  %d", config->sock_sndbuf_size);
        TRACE("  DTM_SOCK_RCV_BUF_SIZE: ");
@@ -277,6 +280,7 @@ int dtm_read_config(DTM_INTERNODE_CB *config, char 
*dtm_config_file)
        config->comm_keepidle_time = USER_TIMEOUT;
        config->i_addr_family = DTM_IP_ADDR_TYPE_IPV4;
        config->bcast_msg_freq = BCAST_FRE;
+       config->cont_bcast_int = CONT_BCAST_INT;
        config->initial_dis_timeout = DIS_TIME_OUT;
        config->sock_sndbuf_size = 0;
        config->sock_rcvbuf_size = 0;
@@ -431,6 +435,19 @@ int dtm_read_config(DTM_INTERNODE_CB *config, char 
*dtm_config_file)
tag = 0;
                                tag_len = 0;
+
+                       }
+                       if (strncmp(line, "DTM_CONTINUOUS_BCAST_INT=", 
strlen("DTM_CONTINUOUS_BCAST_INT=")) == 0) {
+                               tag_len = strlen("DTM_CONTINUOUS_BCAST_INT=");
+                               config->cont_bcast_int = atoi(&line[tag_len]);
+                               if (config->cont_bcast_int < 0) {
+                                       LOG_ER("DTM:cont_bcast_int must be 0 or 
greater");
+                                       fclose(dtm_conf_file);
+                                       return -1;
+                               }
+
+                               tag = 0;
+                               tag_len = 0;
                        }
                        if (strncmp(line, "DTM_BCAST_FRE_MSECS=",
                                    strlen("DTM_BCAST_FRE_MSECS=")) == 0) {
diff --git a/src/dtm/dtmnd/dtmd.conf b/src/dtm/dtmnd/dtmd.conf
index c93b340..51bebb2 100644
--- a/src/dtm/dtmnd/dtmd.conf
+++ b/src/dtm/dtmnd/dtmd.conf
@@ -38,10 +38,14 @@ DTM_UDP_BCAST_SND_PORT=6800
  DTM_UDP_BCAST_REV_PORT=6900
#
-# udp_broadcast_port: The UDP port that the DTMSv listens on
-# Mandatory
+# bcast_freq: frequencey (in milliseconds) between broadcasts during initial
+# discovery
  DTM_BCAST_FRE_MSECS=250
+# cont_bcast_int: interval (in seconds) between broadcasts after initial
+# discovery has completed
+DTM_CONTINUOUS_BCAST_INT=30
+
  # dtm_discovery_timeout_secs:  This is the time dtmc should be allowed to wait
  # for a service script finish executing. If the service is not finished
  # within this time dtmc will send a timeout message to DTMSv


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to