Hi Canh,

ack, review only. I think it would be good to separate the re-factoring 
part in a separate ticket though.

/BR Hans

On 12/18/18 08:25, Canh Van Truong wrote:
> During cluster start, one node (node 1) broadcast up msg to other node. The
> remote node (node 2) get this msg and send the connection to node 1 
> (connect()).
> Similarly node 1 send the connection to  node 2 after node 2 broadcast up msg 
> to.
> Beside of node 2 connect() to node 1, node 2 also add the IP and ID info of 
> node 1 to database.
> But before of that, node 2 may also accept the connection that come from node 
> 1. The
> acception is also add node ID of node 1. So there is 2 times adding the node 
> ID
> info of node 1 to database in node 2. This causes the socket connection is 
> closed
> and node is  restart again.
>
> The patch change to retrieve node from database by node IP instead node ID in
> processing connection. This will reject the double of establishing connection
> between 2 nodes and also double of adding node IP to database.
> ---
>   src/dtm/dtmnd/dtm.h               | 11 ++++--
>   src/dtm/dtmnd/dtm_inter_trans.cc  |  3 +-
>   src/dtm/dtmnd/dtm_node.cc         |  2 +-
>   src/dtm/dtmnd/dtm_node_db.cc      | 79 
> ++++++++++++++++++++++++---------------
>   src/dtm/dtmnd/dtm_node_sockets.cc | 20 ++++++----
>   5 files changed, 72 insertions(+), 43 deletions(-)
>
> diff --git a/src/dtm/dtmnd/dtm.h b/src/dtm/dtmnd/dtm.h
> index 28c811e65..a06b8f503 100644
> --- a/src/dtm/dtmnd/dtm.h
> +++ b/src/dtm/dtmnd/dtm.h
> @@ -45,6 +45,11 @@ typedef enum {
>     DTM_MBX_MSG_TYPE = 5,
>   } MBX_POST_TYPES;
>   
> +typedef enum {
> +  DTM_NODE_ID_KEY_TYPE = 0,
> +  DTM_NODE_IP_KEY_TYPE = 2,
> +} KEY_TYPES;
> +
>   typedef struct dtm_rcv_msg_elem {
>     void *next;
>     MBX_POST_TYPES type;
> @@ -99,10 +104,10 @@ typedef struct dtm_snd_msg_elem {
>   
>   extern void node_discovery_process(void *arg);
>   extern uint32_t dtm_cb_init(DTM_INTERNODE_CB *dtms_cb);
> -extern DTM_NODE_DB *dtm_node_get_by_id(uint32_t nodeid);
> +extern DTM_NODE_DB *dtm_node_get(uint8_t *key, KEY_TYPES type);
>   extern DTM_NODE_DB *dtm_node_getnext_by_id(uint32_t node_id);
> -extern uint32_t dtm_node_add(DTM_NODE_DB *node, int i);
> -extern uint32_t dtm_node_delete(DTM_NODE_DB *nnode, int i);
> +extern uint32_t dtm_node_add(DTM_NODE_DB *node, KEY_TYPES type);
> +extern uint32_t dtm_node_delete(DTM_NODE_DB *nnode, KEY_TYPES type);
>   extern DTM_NODE_DB *dtm_node_new(const DTM_NODE_DB *new_node);
>   extern void dtm_print_config(DTM_INTERNODE_CB *config);
>   extern int dtm_read_config(DTM_INTERNODE_CB *config,
> diff --git a/src/dtm/dtmnd/dtm_inter_trans.cc 
> b/src/dtm/dtmnd/dtm_inter_trans.cc
> index 9d8335466..9b4194614 100644
> --- a/src/dtm/dtmnd/dtm_inter_trans.cc
> +++ b/src/dtm/dtmnd/dtm_inter_trans.cc
> @@ -235,9 +235,10 @@ static uint32_t dtm_internode_snd_msg_common(DTM_NODE_DB 
> *node, uint8_t *buffer,
>   uint32_t dtm_internode_snd_msg_to_node(uint8_t *buffer, uint16_t len,
>                                          NODE_ID node_id) {
>     DTM_NODE_DB *node = nullptr;
> +  uint8_t *key = reinterpret_cast<uint8_t *>(&node_id);
>   
>     TRACE_ENTER();
> -  node = dtm_node_get_by_id(node_id);
> +  node = dtm_node_get(key, DTM_NODE_ID_KEY_TYPE);
>   
>     if (nullptr != node) {
>       if (NCSCC_RC_SUCCESS != dtm_internode_snd_msg_common(node, buffer, 
> len)) {
> diff --git a/src/dtm/dtmnd/dtm_node.cc b/src/dtm/dtmnd/dtm_node.cc
> index de2f94738..72506f262 100644
> --- a/src/dtm/dtmnd/dtm_node.cc
> +++ b/src/dtm/dtmnd/dtm_node.cc
> @@ -125,7 +125,7 @@ uint32_t dtm_process_node_info(DTM_INTERNODE_CB *dtms_cb, 
> DTM_NODE_DB *node,
>         memcpy(node->node_name, data, nodename_len);
>         node->node_name[nodename_len] = '\0';
>         node->comm_status = true;
> -      if (dtm_node_add(node, 0) != NCSCC_RC_SUCCESS) {
> +      if (dtm_node_add(node, DTM_NODE_ID_KEY_TYPE) != NCSCC_RC_SUCCESS) {
>           LOG_ER(
>               "DTM:  A node already exists in the cluster with similar "
>               "configuration (possible duplicate IP address and/or node id), 
> please "
> diff --git a/src/dtm/dtmnd/dtm_node_db.cc b/src/dtm/dtmnd/dtm_node_db.cc
> index 1c9da4dac..1038f0918 100644
> --- a/src/dtm/dtmnd/dtm_node_db.cc
> +++ b/src/dtm/dtmnd/dtm_node_db.cc
> @@ -123,24 +123,49 @@ uint32_t dtm_cb_init(DTM_INTERNODE_CB *dtms_cb) {
>   }
>   
>   /**
> - * Retrieve node from node db by nodeid
> + * Retrieve node from node db
>    *
> - * @param nodeid
> + * @param key
> + * @param i
>    *
> - * @return NCSCC_RC_SUCCESS
> - * @return NCSCC_RC_FAILURE
> + * @return node
>    *
>    */
> -DTM_NODE_DB *dtm_node_get_by_id(uint32_t nodeid) {
> +DTM_NODE_DB *dtm_node_get(uint8_t *key, KEY_TYPES type) {
>     TRACE_ENTER();
>     DTM_INTERNODE_CB *dtms_cb = dtms_gl_cb;
> +  DTM_NODE_DB *node = nullptr;
>   
> -  DTM_NODE_DB *node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get(
> -      &dtms_cb->nodeid_tree, reinterpret_cast<uint8_t *>(&nodeid)));
> -  if (node != nullptr) {
> -    /* Adjust the pointer */
> -    node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char *>(node) -
> -                                           offsetof(DTM_NODE_DB, 
> pat_nodeid));
> +  osafassert(key != nullptr);
> +
> +  switch (type) {
> +    case DTM_NODE_ID_KEY_TYPE:
> +      TRACE("DTM: Getting node from the database by node_id : %u as key",
> +            *reinterpret_cast<NODE_ID *>(key));
> +      node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get(
> +          &dtms_cb->nodeid_tree, key));
> +      if (node != nullptr) {
> +        // Adjust the pointer
> +        node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char 
> *>(node) -
> +            offsetof(DTM_NODE_DB, pat_nodeid));
> +      }
> +      break;
> +
> +    case DTM_NODE_IP_KEY_TYPE:
> +      TRACE("DTM: Getting node from the database by node_ip : %s as key",
> +            reinterpret_cast<char *>(key));
> +      node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get(
> +          &dtms_cb->ip_addr_tree, key));
> +      if (node != nullptr) {
> +        // Adjust the pointer
> +        node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char 
> *>(node) -
> +            offsetof(DTM_NODE_DB, pat_ip_address));
> +      }
> +      break;
> +
> +    default:
> +      osafassert(false);
> +      break;
>     }
>   
>     TRACE_LEAVE();
> @@ -189,16 +214,16 @@ DTM_NODE_DB *dtm_node_getnext_by_id(uint32_t node_id) {
>    * @return NCSCC_RC_FAILURE
>    *
>    */
> -uint32_t dtm_node_add(DTM_NODE_DB *node, int i) {
> +uint32_t dtm_node_add(DTM_NODE_DB *node, KEY_TYPES type) {
>     uint32_t rc = NCSCC_RC_SUCCESS;
>     DTM_INTERNODE_CB *dtms_cb = dtms_gl_cb;
>     TRACE_ENTER();
> -  TRACE("DTM:value of i %d", i);
> +  TRACE("DTM:value of i %d", type);
>   
>     osafassert(node != nullptr);
>   
> -  switch (i) {
> -    case 0:
> +  switch (type) {
> +    case DTM_NODE_ID_KEY_TYPE:
>         TRACE("DTM:Adding node_id to the database with node_id :%u as key",
>               node->node_id);
>         node->pat_nodeid.key_info = reinterpret_cast<uint8_t 
> *>(&(node->node_id));
> @@ -210,11 +235,8 @@ uint32_t dtm_node_add(DTM_NODE_DB *node, int i) {
>           goto done;
>         }
>         break;
> -    case 1:
> -      osafassert(false);
> -      break;
>   
> -    case 2:
> +    case DTM_NODE_IP_KEY_TYPE:
>         TRACE("DTM:Adding node_ip to the database with node_ip :%s as key",
>               node->node_ip);
>         node->pat_ip_address.key_info =
> @@ -230,8 +252,8 @@ uint32_t dtm_node_add(DTM_NODE_DB *node, int i) {
>   
>       default:
>         TRACE("DTM:Invalid Patricia add");
> -      rc = NCSCC_RC_FAILURE;
> -      goto done;
> +      osafassert(false);
> +      break;
>     }
>   
>   done:
> @@ -248,15 +270,15 @@ done:
>    * @return NCSCC_RC_FAILURE
>    *
>    */
> -uint32_t dtm_node_delete(DTM_NODE_DB *node, int i) {
> +uint32_t dtm_node_delete(DTM_NODE_DB *node, KEY_TYPES type) {
>     uint32_t rc = NCSCC_RC_SUCCESS;
>     DTM_INTERNODE_CB *dtms_cb = dtms_gl_cb;
> -  TRACE_ENTER2("DTM:value of i %d", i);
> +  TRACE_ENTER2("DTM:value of i %d", type);
>   
>     osafassert(node != nullptr);
>   
> -  switch (i) {
> -    case 0:
> +  switch (type) {
> +    case DTM_NODE_ID_KEY_TYPE:
>         if (node->node_id != 0 && node->pat_nodeid.key_info) {
>           TRACE("DTM:Deleting node_id from the database with node_id :%u as 
> key",
>                 node->node_id);
> @@ -269,11 +291,8 @@ uint32_t dtm_node_delete(DTM_NODE_DB *node, int i) {
>           }
>         }
>         break;
> -    case 1:
> -      osafassert(false);
> -      break;
>   
> -    case 2:
> +    case DTM_NODE_IP_KEY_TYPE:
>         if (node->node_ip != nullptr && node->pat_ip_address.key_info) {
>           TRACE("DTM:Deleting node_ip from the  database with node_ip :%s as 
> key",
>                 node->node_ip);
> @@ -290,7 +309,7 @@ uint32_t dtm_node_delete(DTM_NODE_DB *node, int i) {
>       default:
>         TRACE(
>             "DTM:Deleting node from the database  with unknown option :%d as 
> key",
> -          i);
> +          type);
>         osafassert(0);
>     }
>   
> diff --git a/src/dtm/dtmnd/dtm_node_sockets.cc 
> b/src/dtm/dtmnd/dtm_node_sockets.cc
> index 0ddfc6f58..a0b835c29 100644
> --- a/src/dtm/dtmnd/dtm_node_sockets.cc
> +++ b/src/dtm/dtmnd/dtm_node_sockets.cc
> @@ -171,11 +171,11 @@ void dtm_comm_socket_close(DTM_NODE_DB *node) {
>         }
>       }
>   
> -    if (dtm_node_delete(node, 0) != NCSCC_RC_SUCCESS) {
> +    if (dtm_node_delete(node, DTM_NODE_ID_KEY_TYPE) != NCSCC_RC_SUCCESS) {
>         LOG_ER("DTM :dtm_node_delete failed ");
>       }
>   
> -    if (dtm_node_delete(node, 2) != NCSCC_RC_SUCCESS) {
> +    if (dtm_node_delete(node, DTM_NODE_IP_KEY_TYPE) != NCSCC_RC_SUCCESS) {
>         LOG_ER("DTM :dtm_node_delete failed ");
>       }
>   
> @@ -566,7 +566,11 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB 
> *dtms_cb, uint8_t *data,
>       }
>     }
>   
> -  new_node = dtm_node_get_by_id(node.node_id);
> +  // Retrieve node from nodeip instead of nodeid to prevent the case dtm 
> already
> +  // accepted remote node connection and dtm already add the nodeip to 
> database.
> +  // If so, dtm should drop this message as discovery in progress.
> +  uint8_t *nodeip = reinterpret_cast<uint8_t *>(node.node_ip);
> +  new_node = dtm_node_get(nodeip, DTM_NODE_IP_KEY_TYPE);
>     if (new_node != nullptr) {
>       if ((new_node->node_id == 0) || (new_node->node_id == node.node_id) ||
>           (strncmp(node.node_ip, new_node->node_ip, INET6_ADDRSTRLEN) == 0)) {
> @@ -593,10 +597,10 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB 
> *dtms_cb, uint8_t *data,
>                    0))) {
>         TRACE("DTM: deleting stale enty cluster_id: %d, node_id :%u, 
> node_ip:%s",
>               node.cluster_id, node.node_id, node.node_ip);
> -      if (dtm_node_delete(new_node, 0) != NCSCC_RC_SUCCESS) {
> +      if (dtm_node_delete(new_node, DTM_NODE_ID_KEY_TYPE) != 
> NCSCC_RC_SUCCESS) {
>           LOG_ER("DTM :dtm_node_delete failed (recv())");
>         }
> -      if (dtm_node_delete(new_node, 2) != NCSCC_RC_SUCCESS) {
> +      if (dtm_node_delete(new_node, DTM_NODE_IP_KEY_TYPE) != 
> NCSCC_RC_SUCCESS) {
>           LOG_ER("DTM :dtm_node_delete failed (recv())");
>         }
>         free(new_node);
> @@ -621,7 +625,7 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB 
> *dtms_cb, uint8_t *data,
>     if (sock_desc != -1) {
>       TRACE("DTM: dtm_node_add .node_ip: %s node_id: %u, comm_socket %d",
>             new_node->node_ip, new_node->node_id, new_node->comm_socket);
> -    if (dtm_node_add(new_node, 0) != NCSCC_RC_SUCCESS) {
> +    if (dtm_node_add(new_node, DTM_NODE_ID_KEY_TYPE) != NCSCC_RC_SUCCESS) {
>         LOG_ER("DTM: dtm_node_add failed .node_ip: %s, node_id: %u",
>                new_node->node_ip, new_node->node_id);
>         dtm_comm_socket_close(new_node);
> @@ -629,7 +633,7 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB 
> *dtms_cb, uint8_t *data,
>         goto node_fail;
>       }
>   
> -    if (dtm_node_add(new_node, 2) != NCSCC_RC_SUCCESS) {
> +    if (dtm_node_add(new_node, DTM_NODE_IP_KEY_TYPE) != NCSCC_RC_SUCCESS) {
>         LOG_ER("DTM: dtm_node_add failed .node_ip: %s, node_id: %u",
>                new_node->node_ip, new_node->node_id);
>         dtm_comm_socket_close(new_node);
> @@ -748,7 +752,7 @@ DTM_NODE_DB *dtm_process_accept(DTM_INTERNODE_CB 
> *dtms_cb, int stream_sock) {
>         continue;
>       }
>   
> -    if (dtm_node_add(new_node, 2) != NCSCC_RC_SUCCESS) {
> +    if (dtm_node_add(new_node, DTM_NODE_IP_KEY_TYPE) != NCSCC_RC_SUCCESS) {
>         LOG_ER("DTM: dtm_node_add failed .node_ip: %s, node_id: %u",
>                new_node->node_ip, new_node->node_id);
>         dtm_comm_socket_close(new_node);

_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to