- Use non-blocking in connect new socket. - Dtm does not exit when connect failure. --- src/dtm/dtmnd/dtm_node.h | 1 + src/dtm/dtmnd/dtm_node_sockets.cc | 105 +++++++++++++++++++++++++++--- 2 files changed, 97 insertions(+), 9 deletions(-)
diff --git a/src/dtm/dtmnd/dtm_node.h b/src/dtm/dtmnd/dtm_node.h index 82435cc11..aaeba69c7 100644 --- a/src/dtm/dtmnd/dtm_node.h +++ b/src/dtm/dtmnd/dtm_node.h @@ -18,6 +18,7 @@ #ifndef DTM_DTMND_DTM_NODE_H_ #define DTM_DTMND_DTM_NODE_H_ +#define DTM_TCP_TIMEOUT_SECS 10 #include <sys/socket.h> #include <cstddef> diff --git a/src/dtm/dtmnd/dtm_node_sockets.cc b/src/dtm/dtmnd/dtm_node_sockets.cc index 8e1299368..f09b09157 100644 --- a/src/dtm/dtmnd/dtm_node_sockets.cc +++ b/src/dtm/dtmnd/dtm_node_sockets.cc @@ -221,6 +221,88 @@ uint32_t dtm_comm_socket_send(int sock_desc, const void *buffer, return rc; } +/* + * By default TCP timeouts can be very long. This can lead to blocking for a + * very long time waiting on connect(). This function sets the socket to + * non-blocking mode for the connect and returns the socket to blocking mode + * once the connect has been established. + * + * \param socket file descriptor + * \param socket address structure + * \size of address structure + * + * \return < 0 on error, 0 on success + */ +int non_blocking_connect(int sockd, struct sockaddr *sin, socklen_t length) { + struct pollfd wset; + socklen_t len; + int flags, ret, opt; + + /* Set the socket fd to non-blocking mode */ + if ((flags = fcntl(sockd, F_GETFL, NULL)) < 0) { + LOG_ER("Error fcntl(..., F_GETFL)"); + exit(EXIT_FAILURE); + } + flags |= O_NONBLOCK; + if (fcntl(sockd, F_SETFL, flags) < 0) { + LOG_ER("Error fcntl(..., F_SETFL)"); + exit(EXIT_FAILURE); + } + + /* connect with timeout */ + ret = + connect(sockd, (struct sockaddr *)sin, length); + if (ret < 0) { + if (errno == EINPROGRESS) { + /* poll the fd until we get a connection, timeout, or + * error */ + while (1) { + wset.fd = sockd; + wset.events = POLLOUT; + + ret = poll(&wset, 1, + DTM_TCP_TIMEOUT_SECS * 1000); + if (ret < 0 && errno != EINTR) { + LOG_ER("Error poll"); + return -1; + + } else if (ret > 0) { + // Socket polled for write + len = sizeof(int); + if (getsockopt(sockd, SOL_SOCKET, SO_ERROR, + reinterpret_cast<void *>(&opt), + &len) < 0) { + LOG_ER("Error getsockopt(...,SOL_SOCKET,..)"); + return -1; + } + // Check the value returned... + if (opt) { + LOG_ER("Error getsockopt"); + return -1; + } + break; + } else { /* Timeout */ + LOG_ER("Timeout in connect()"); + return -2; + } + } + } else { /* Real error returned from connect */ + int err = errno; + LOG_ER("Connect failed (connect()) err :%s", strerror(err)); + return -1; + } + } + + /* Connection has been established switch back to blocking mode */ + flags &= (~O_NONBLOCK); + if (fcntl(sockd, F_SETFL, flags) < 0) { + LOG_ER("Error fcntl"); + exit(EXIT_FAILURE); + } + + return 0; +} + /** * Setup the new communication socket * @@ -235,7 +317,7 @@ int comm_socket_setup_new(DTM_INTERNODE_CB *dtms_cb, sa_family_t ip_addr_type) { int sock_desc = -1, sndbuf_size = dtms_cb->sock_sndbuf_size, rcvbuf_size = dtms_cb->sock_rcvbuf_size; - int err = 0, rv; + int rv; char local_port_str[INET6_ADDRSTRLEN]; struct addrinfo *addr_list; struct addrinfo addr_criteria, *p; /* Criteria for address match */ @@ -359,9 +441,9 @@ int comm_socket_setup_new(DTM_INTERNODE_CB *dtms_cb, } /* Try to connect to the given port */ - if (connect(sock_desc, addr_list->ai_addr, addr_list->ai_addrlen) < 0) { - err = errno; - LOG_ER("DTM :Connect failed (connect()) err :%s", strerror(err)); + if (non_blocking_connect(sock_desc, addr_list->ai_addr, + addr_list->ai_addrlen) < 0) { + LOG_ER("DTM :non_blocking_connect() failed"); close(sock_desc); sock_desc = -1; goto done; @@ -649,12 +731,12 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB *dtms_cb, uint8_t *data, int sock_desc = comm_socket_setup_new(dtms_cb, node.node_ip, foreign_port, node.i_addr_family); - new_node->comm_socket = sock_desc; - new_node->node_id = node.node_id; - memcpy(new_node->node_ip, node.node_ip, INET6_ADDRSTRLEN); - new_node->i_addr_family = node.i_addr_family; - if (sock_desc != -1) { + new_node->comm_socket = sock_desc; + new_node->node_id = node.node_id; + memcpy(new_node->node_ip, node.node_ip, INET6_ADDRSTRLEN); + new_node->i_addr_family = node.i_addr_family; + TRACE("DTM: dtm_node_add .node_ip: %s node_id: %x, comm_socket %d", new_node->node_ip, new_node->node_id, new_node->comm_socket); if (dtm_node_add(new_node, KeyTypes::kDtmNodeIdKeyType) != @@ -663,6 +745,7 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB *dtms_cb, uint8_t *data, new_node->node_ip, new_node->node_id); dtm_comm_socket_close(new_node); sock_desc = -1; + new_node = nullptr; goto node_fail; } @@ -672,10 +755,14 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB *dtms_cb, uint8_t *data, new_node->node_ip, new_node->node_id); dtm_comm_socket_close(new_node); sock_desc = -1; + new_node = nullptr; goto node_fail; } else TRACE("DTM: dtm_node_add add .node_ip: %s, node_id: %x", new_node->node_ip, new_node->node_id); + } else { + new_node = nullptr; + LOG_ER("comm_socket_setup_new failed for node.node_ip: %s", node.node_ip); } node_fail: -- 2.25.1 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel