osaf/services/infrastructure/dtms/config/dtmd.conf | 12 ++++++++++
osaf/services/infrastructure/dtms/dtm/dtm_node_sockets.c | 11 +++++++++
osaf/services/infrastructure/dtms/dtm/dtm_read_config.c | 19 +++++++++++++++-
osaf/services/infrastructure/dtms/include/dtm.h | 4 +++
osaf/services/infrastructure/dtms/include/dtm_cb.h | 1 +
5 files changed, 46 insertions(+), 1 deletions(-)
Since Linux kernel above and equal 3.18, TCP sockets have an option called
TCP_USER_TIMEOUT.
TCP_USER_TIMEOUT is a TCP level socket option that takes an unsigned int,
when grater than 0, to specify the maximum amount of time in ms that transmitted
data may remain unacknowledged before TCP will forcefully close the
corresponding connection and return ETIMEDOUT to the application. If
0 is given, TCP will continue to use the system default.
Increasing the TCP_USER_TIMEOUT allows a TCP connection to survive extended
periods without end-to-end connectivity.
Decreasing the user timeouts allows applications to fail fast if so desired.
Otherwise it may take upto 20 minutes with the current system
defaults in a normal WAN environment.
This option, TCP_USER_TIMEOUT will override keep-alive
to determine when to close a connection due to keep-alive failure.
Defaulted to 1.5 sec to match other transport protocols (TIPC) supported
Opensaf.
Try to tune & test the DTM_TCP_USER_TIMEOUT=1500 to higher and lower value in
dtmd.conf
diff --git a/osaf/services/infrastructure/dtms/config/dtmd.conf
b/osaf/services/infrastructure/dtms/config/dtmd.conf
--- a/osaf/services/infrastructure/dtms/config/dtmd.conf
+++ b/osaf/services/infrastructure/dtms/config/dtmd.conf
@@ -73,6 +73,18 @@ DTM_TCP_KEEPALIVE_INTVL=1
DTM_TCP_KEEPALIVE_PROBES=2
#
+# Since Linux kernel >= 3.18, TCP sockets have an option called
TCP_USER_TIMEOUT.
+# Increasing the TCP_USER_TIMEOUT allows a TCP connection to survive extended
+# periods without end-to-end connectivity.
+# Decreasing the user timeouts allows applications to "fail fast" if so
desired.
+# Otherwise it may take upto 20 minutes with the current system
+# defaults in a normal WAN environment.
+# This option, TCP_USER_TIMEOUT will override keepalive
+# to determine when to close a connection due to keepalive failure.
+# kept to 1.5 sec to match other transport protocols supported Opensaf
+DTM_TCP_USER_TIMEOUT=1500
+
+#
#Used to Set the dtm internode & intranode socket SO_SNDBUF/SO_RCVBUF buffer
in bytes.
#The kernel doubles this value (to allow space for bookkeeping over-head)
#when it is set using setsockopt(),and this doubled value is returned by
diff --git a/osaf/services/infrastructure/dtms/dtm/dtm_node_sockets.c
b/osaf/services/infrastructure/dtms/dtm/dtm_node_sockets.c
--- a/osaf/services/infrastructure/dtms/dtm/dtm_node_sockets.c
+++ b/osaf/services/infrastructure/dtms/dtm/dtm_node_sockets.c
@@ -74,6 +74,7 @@ static uint32_t set_keepalive(DTM_INTERN
TRACE_ENTER();
int comm_keepidle_time, comm_keepalive_intvl, comm_keepalive_probes;
+ int comm_user_timeout;
int so_keepalive;
int smode = 1;
@@ -82,6 +83,7 @@ static uint32_t set_keepalive(DTM_INTERN
comm_keepidle_time = dtms_cb->comm_keepidle_time;
comm_keepalive_intvl = dtms_cb->comm_keepalive_intvl;
comm_keepalive_probes = dtms_cb->comm_keepalive_probes;
+ comm_user_timeout = dtms_cb->comm_user_timeout;
if (so_keepalive == true) {
socklen_t optlen;
@@ -117,6 +119,15 @@ static uint32_t set_keepalive(DTM_INTERN
TRACE_LEAVE2("rc :%d", NCSCC_RC_FAILURE);
return NCSCC_RC_FAILURE;
}
+
+ /* Set TCP_USER_TIMEOUT timeout in milliseconds [ms] */
+ optlen = sizeof(comm_user_timeout);
+ if (setsockopt(sock_desc, SOL_TCP, TCP_USER_TIMEOUT,
&comm_user_timeout, optlen) < 0) {
+ LOG_ER("DTM :setsockopt TCP_USER_TIMEOUT failed err
:%s ", strerror(errno));
+ TRACE_LEAVE2("rc :%d", NCSCC_RC_FAILURE);
+ return NCSCC_RC_FAILURE;
+ }
+
}
if ((setsockopt(sock_desc, SOL_SOCKET, SO_REUSEADDR, (void *)&smode,
sizeof(smode)) == -1)) {
diff --git a/osaf/services/infrastructure/dtms/dtm/dtm_read_config.c
b/osaf/services/infrastructure/dtms/dtm/dtm_read_config.c
--- a/osaf/services/infrastructure/dtms/dtm/dtm_read_config.c
+++ b/osaf/services/infrastructure/dtms/dtm/dtm_read_config.c
@@ -39,6 +39,7 @@ extern uint32_t intranode_max_processes;
#define KEEPALIVE_PROBES 9
#define DIS_TIME_OUT 5
#define BCAST_FRE 250
+#define USER_TIMEOUT 1500 // 1.5 sec to match other transport
const char *IN6ADDR_LINK_LOCAL = "ff02::1"; /* IPv6, Scope:Link
multicast address */
const char *IN6ADDR_LINK_GLOBAL = "ff0e::1"; /* IPv6, Scope:Global
multicast address */
@@ -100,7 +101,9 @@ void dtm_print_config(DTM_INTERNODE_CB *
TRACE(" %d", config->comm_keepalive_intvl);
TRACE(" COMM_KEEPALIVE_PROBES: ");
TRACE(" %d", config->comm_keepalive_probes);
- TRACE(" DTM_INI_DIS_TIMEOUT_SECS: ");
+ TRACE(" COMM_TCP_USER_TIMEOUT: ");
+ TRACE(" %d", config->comm_user_timeout);
+ TRACE(" DTM_INI_DIS_TIMEOUT_SECS: ");
TRACE(" %d", config->initial_dis_timeout);
TRACE(" DTM_BCAST_FRE_MSECS: ");
TRACE(" %" PRId64 "", config->bcast_msg_freq);
@@ -254,6 +257,7 @@ int dtm_read_config(DTM_INTERNODE_CB * c
config->comm_keepidle_time = KEEPIDLE_TIME;
config->comm_keepalive_intvl = KEEPALIVE_INTVL;
config->comm_keepalive_probes = KEEPALIVE_PROBES;
+ config->comm_keepidle_time = USER_TIMEOUT;
config->i_addr_family = DTM_IP_ADDR_TYPE_IPV4;
config->bcast_msg_freq = BCAST_FRE;
config->initial_dis_timeout = DIS_TIME_OUT;
@@ -475,6 +479,19 @@ int dtm_read_config(DTM_INTERNODE_CB * c
tag_len = 0;
}
+ if (strncmp(line, "DTM_TCP_USER_TIMEOUT=",
strlen("DTM_TCP_USER_TIMEOUT=")) == 0) {
+ tag_len = strlen("DTM_TCP_USER_TIMEOUT=");
+ config->comm_user_timeout =
atoi(&line[tag_len]);
+ if (config->comm_user_timeout < 1) {
+ LOG_ER("DTM:comm_user_timeout must be a
positive integer");
+ fclose(dtm_conf_file);
+ return -1;
+ }
+
+ tag = 0;
+ tag_len = 0;
+
+ }
if (strncmp(line, "DTM_SOCK_SND_RCV_BUF_SIZE=",
strlen("DTM_SOCK_SND_RCV_BUF_SIZE=")) == 0) {
tag_len = strlen("DTM_SOCK_SND_RCV_BUF_SIZE=");
uint32_t sndbuf_size = 0; /* Send buffer size */
diff --git a/osaf/services/infrastructure/dtms/include/dtm.h
b/osaf/services/infrastructure/dtms/include/dtm.h
--- a/osaf/services/infrastructure/dtms/include/dtm.h
+++ b/osaf/services/infrastructure/dtms/include/dtm.h
@@ -32,6 +32,10 @@
extern DTM_INTERNODE_CB *dtms_gl_cb;
extern uint8_t initial_discovery_phase;
+#ifndef TCP_USER_TIMEOUT
+#define TCP_USER_TIMEOUT 18
+#endif
+
#define BMCAST_MSG_LEN ( sizeof(uint16_t) + sizeof(uint32_t) +
sizeof(uint32_t) + sizeof(uint16_t)
#define m_NODE_DISCOVERY_TASKNAME "NODE_DISCOVERY"
diff --git a/osaf/services/infrastructure/dtms/include/dtm_cb.h
b/osaf/services/infrastructure/dtms/include/dtm_cb.h
--- a/osaf/services/infrastructure/dtms/include/dtm_cb.h
+++ b/osaf/services/infrastructure/dtms/include/dtm_cb.h
@@ -89,6 +89,7 @@ typedef struct dtm_internode_cb {
int comm_keepidle_time;
int comm_keepalive_intvl;
int comm_keepalive_probes;
+ int comm_user_timeout; // tcp socket user timeout in milliseconds [ms]
int32_t sock_sndbuf_size; /* The value of SO_SNDBUF */
int32_t sock_rcvbuf_size; /* The value of SO_RCVBUF */
SYSF_MBX mbx;
------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel