Hi Gary and Hans,

Thanks for your review.
I updated Gary comment and running patch test again.

@Hans. Because there is just small changes that does not related to the ticket. 
(the changes in dtm_node_delete() and dtm_node_add() with replace number by 
enum) so I don’t separate the patch.

I will send the new after patch test ran.

Regards
Canh
-----Original Message-----
From: Gary Lee <[email protected]> 
Sent: Thursday, March 7, 2019 6:58 AM
To: Hans Nordebäck <[email protected]>; Canh Van Truong 
<[email protected]>; Anders Widell <[email protected]>; 
Minh Hon Chau <[email protected]>
Cc: [email protected]
Subject: Re: [PATCH 1/1] dtm: Fix dtm close socket due to duplication of adding 
node IP info [#2984]

Hi Canh

One minor comment, KEY_TYPES should probably be called KeyTypes. Also, 
can you make it an enum class, rather than plain enum?

Thanks

Gary

On 7/3/19 12:53 am, Hans Nordebäck wrote:
> Hi Canh,
>
> ack, review only. I think it would be good to separate the re-factoring
> part in a separate ticket though.
>
> /BR Hans
>
> On 12/18/18 08:25, Canh Van Truong wrote:
>> During cluster start, one node (node 1) broadcast up msg to other node. The
>> remote node (node 2) get this msg and send the connection to node 1 
>> (connect()).
>> Similarly node 1 send the connection to  node 2 after node 2 broadcast up 
>> msg to.
>> Beside of node 2 connect() to node 1, node 2 also add the IP and ID info of 
>> node 1 to database.
>> But before of that, node 2 may also accept the connection that come from 
>> node 1. The
>> acception is also add node ID of node 1. So there is 2 times adding the node 
>> ID
>> info of node 1 to database in node 2. This causes the socket connection is 
>> closed
>> and node is  restart again.
>>
>> The patch change to retrieve node from database by node IP instead node ID in
>> processing connection. This will reject the double of establishing connection
>> between 2 nodes and also double of adding node IP to database.
>> ---
>>    src/dtm/dtmnd/dtm.h               | 11 ++++--
>>    src/dtm/dtmnd/dtm_inter_trans.cc  |  3 +-
>>    src/dtm/dtmnd/dtm_node.cc         |  2 +-
>>    src/dtm/dtmnd/dtm_node_db.cc      | 79 
>> ++++++++++++++++++++++++---------------
>>    src/dtm/dtmnd/dtm_node_sockets.cc | 20 ++++++----
>>    5 files changed, 72 insertions(+), 43 deletions(-)
>>
>> diff --git a/src/dtm/dtmnd/dtm.h b/src/dtm/dtmnd/dtm.h
>> index 28c811e65..a06b8f503 100644
>> --- a/src/dtm/dtmnd/dtm.h
>> +++ b/src/dtm/dtmnd/dtm.h
>> @@ -45,6 +45,11 @@ typedef enum {
>>      DTM_MBX_MSG_TYPE = 5,
>>    } MBX_POST_TYPES;
>>    
>> +typedef enum {
>> +  DTM_NODE_ID_KEY_TYPE = 0,
>> +  DTM_NODE_IP_KEY_TYPE = 2,
>> +} KEY_TYPES;
>> +
>>    typedef struct dtm_rcv_msg_elem {
>>      void *next;
>>      MBX_POST_TYPES type;
>> @@ -99,10 +104,10 @@ typedef struct dtm_snd_msg_elem {
>>    
>>    extern void node_discovery_process(void *arg);
>>    extern uint32_t dtm_cb_init(DTM_INTERNODE_CB *dtms_cb);
>> -extern DTM_NODE_DB *dtm_node_get_by_id(uint32_t nodeid);
>> +extern DTM_NODE_DB *dtm_node_get(uint8_t *key, KEY_TYPES type);
>>    extern DTM_NODE_DB *dtm_node_getnext_by_id(uint32_t node_id);
>> -extern uint32_t dtm_node_add(DTM_NODE_DB *node, int i);
>> -extern uint32_t dtm_node_delete(DTM_NODE_DB *nnode, int i);
>> +extern uint32_t dtm_node_add(DTM_NODE_DB *node, KEY_TYPES type);
>> +extern uint32_t dtm_node_delete(DTM_NODE_DB *nnode, KEY_TYPES type);
>>    extern DTM_NODE_DB *dtm_node_new(const DTM_NODE_DB *new_node);
>>    extern void dtm_print_config(DTM_INTERNODE_CB *config);
>>    extern int dtm_read_config(DTM_INTERNODE_CB *config,
>> diff --git a/src/dtm/dtmnd/dtm_inter_trans.cc 
>> b/src/dtm/dtmnd/dtm_inter_trans.cc
>> index 9d8335466..9b4194614 100644
>> --- a/src/dtm/dtmnd/dtm_inter_trans.cc
>> +++ b/src/dtm/dtmnd/dtm_inter_trans.cc
>> @@ -235,9 +235,10 @@ static uint32_t 
>> dtm_internode_snd_msg_common(DTM_NODE_DB *node, uint8_t *buffer,
>>    uint32_t dtm_internode_snd_msg_to_node(uint8_t *buffer, uint16_t len,
>>                                           NODE_ID node_id) {
>>      DTM_NODE_DB *node = nullptr;
>> +  uint8_t *key = reinterpret_cast<uint8_t *>(&node_id);
>>    
>>      TRACE_ENTER();
>> -  node = dtm_node_get_by_id(node_id);
>> +  node = dtm_node_get(key, DTM_NODE_ID_KEY_TYPE);
>>    
>>      if (nullptr != node) {
>>        if (NCSCC_RC_SUCCESS != dtm_internode_snd_msg_common(node, buffer, 
>> len)) {
>> diff --git a/src/dtm/dtmnd/dtm_node.cc b/src/dtm/dtmnd/dtm_node.cc
>> index de2f94738..72506f262 100644
>> --- a/src/dtm/dtmnd/dtm_node.cc
>> +++ b/src/dtm/dtmnd/dtm_node.cc
>> @@ -125,7 +125,7 @@ uint32_t dtm_process_node_info(DTM_INTERNODE_CB 
>> *dtms_cb, DTM_NODE_DB *node,
>>          memcpy(node->node_name, data, nodename_len);
>>          node->node_name[nodename_len] = '\0';
>>          node->comm_status = true;
>> -      if (dtm_node_add(node, 0) != NCSCC_RC_SUCCESS) {
>> +      if (dtm_node_add(node, DTM_NODE_ID_KEY_TYPE) != NCSCC_RC_SUCCESS) {
>>            LOG_ER(
>>                "DTM:  A node already exists in the cluster with similar "
>>                "configuration (possible duplicate IP address and/or node 
>> id), please "
>> diff --git a/src/dtm/dtmnd/dtm_node_db.cc b/src/dtm/dtmnd/dtm_node_db.cc
>> index 1c9da4dac..1038f0918 100644
>> --- a/src/dtm/dtmnd/dtm_node_db.cc
>> +++ b/src/dtm/dtmnd/dtm_node_db.cc
>> @@ -123,24 +123,49 @@ uint32_t dtm_cb_init(DTM_INTERNODE_CB *dtms_cb) {
>>    }
>>    
>>    /**
>> - * Retrieve node from node db by nodeid
>> + * Retrieve node from node db
>>     *
>> - * @param nodeid
>> + * @param key
>> + * @param i
>>     *
>> - * @return NCSCC_RC_SUCCESS
>> - * @return NCSCC_RC_FAILURE
>> + * @return node
>>     *
>>     */
>> -DTM_NODE_DB *dtm_node_get_by_id(uint32_t nodeid) {
>> +DTM_NODE_DB *dtm_node_get(uint8_t *key, KEY_TYPES type) {
>>      TRACE_ENTER();
>>      DTM_INTERNODE_CB *dtms_cb = dtms_gl_cb;
>> +  DTM_NODE_DB *node = nullptr;
>>    
>> -  DTM_NODE_DB *node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get(
>> -      &dtms_cb->nodeid_tree, reinterpret_cast<uint8_t *>(&nodeid)));
>> -  if (node != nullptr) {
>> -    /* Adjust the pointer */
>> -    node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char *>(node) -
>> -                                           offsetof(DTM_NODE_DB, 
>> pat_nodeid));
>> +  osafassert(key != nullptr);
>> +
>> +  switch (type) {
>> +    case DTM_NODE_ID_KEY_TYPE:
>> +      TRACE("DTM: Getting node from the database by node_id : %u as key",
>> +            *reinterpret_cast<NODE_ID *>(key));
>> +      node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get(
>> +          &dtms_cb->nodeid_tree, key));
>> +      if (node != nullptr) {
>> +        // Adjust the pointer
>> +        node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char 
>> *>(node) -
>> +            offsetof(DTM_NODE_DB, pat_nodeid));
>> +      }
>> +      break;
>> +
>> +    case DTM_NODE_IP_KEY_TYPE:
>> +      TRACE("DTM: Getting node from the database by node_ip : %s as key",
>> +            reinterpret_cast<char *>(key));
>> +      node = reinterpret_cast<DTM_NODE_DB *>(ncs_patricia_tree_get(
>> +          &dtms_cb->ip_addr_tree, key));
>> +      if (node != nullptr) {
>> +        // Adjust the pointer
>> +        node = reinterpret_cast<DTM_NODE_DB *>(reinterpret_cast<char 
>> *>(node) -
>> +            offsetof(DTM_NODE_DB, pat_ip_address));
>> +      }
>> +      break;
>> +
>> +    default:
>> +      osafassert(false);
>> +      break;
>>      }
>>    
>>      TRACE_LEAVE();
>> @@ -189,16 +214,16 @@ DTM_NODE_DB *dtm_node_getnext_by_id(uint32_t node_id) {
>>     * @return NCSCC_RC_FAILURE
>>     *
>>     */
>> -uint32_t dtm_node_add(DTM_NODE_DB *node, int i) {
>> +uint32_t dtm_node_add(DTM_NODE_DB *node, KEY_TYPES type) {
>>      uint32_t rc = NCSCC_RC_SUCCESS;
>>      DTM_INTERNODE_CB *dtms_cb = dtms_gl_cb;
>>      TRACE_ENTER();
>> -  TRACE("DTM:value of i %d", i);
>> +  TRACE("DTM:value of i %d", type);
>>    
>>      osafassert(node != nullptr);
>>    
>> -  switch (i) {
>> -    case 0:
>> +  switch (type) {
>> +    case DTM_NODE_ID_KEY_TYPE:
>>          TRACE("DTM:Adding node_id to the database with node_id :%u as key",
>>                node->node_id);
>>          node->pat_nodeid.key_info = reinterpret_cast<uint8_t 
>> *>(&(node->node_id));
>> @@ -210,11 +235,8 @@ uint32_t dtm_node_add(DTM_NODE_DB *node, int i) {
>>            goto done;
>>          }
>>          break;
>> -    case 1:
>> -      osafassert(false);
>> -      break;
>>    
>> -    case 2:
>> +    case DTM_NODE_IP_KEY_TYPE:
>>          TRACE("DTM:Adding node_ip to the database with node_ip :%s as key",
>>                node->node_ip);
>>          node->pat_ip_address.key_info =
>> @@ -230,8 +252,8 @@ uint32_t dtm_node_add(DTM_NODE_DB *node, int i) {
>>    
>>        default:
>>          TRACE("DTM:Invalid Patricia add");
>> -      rc = NCSCC_RC_FAILURE;
>> -      goto done;
>> +      osafassert(false);
>> +      break;
>>      }
>>    
>>    done:
>> @@ -248,15 +270,15 @@ done:
>>     * @return NCSCC_RC_FAILURE
>>     *
>>     */
>> -uint32_t dtm_node_delete(DTM_NODE_DB *node, int i) {
>> +uint32_t dtm_node_delete(DTM_NODE_DB *node, KEY_TYPES type) {
>>      uint32_t rc = NCSCC_RC_SUCCESS;
>>      DTM_INTERNODE_CB *dtms_cb = dtms_gl_cb;
>> -  TRACE_ENTER2("DTM:value of i %d", i);
>> +  TRACE_ENTER2("DTM:value of i %d", type);
>>    
>>      osafassert(node != nullptr);
>>    
>> -  switch (i) {
>> -    case 0:
>> +  switch (type) {
>> +    case DTM_NODE_ID_KEY_TYPE:
>>          if (node->node_id != 0 && node->pat_nodeid.key_info) {
>>            TRACE("DTM:Deleting node_id from the database with node_id :%u as 
>> key",
>>                  node->node_id);
>> @@ -269,11 +291,8 @@ uint32_t dtm_node_delete(DTM_NODE_DB *node, int i) {
>>            }
>>          }
>>          break;
>> -    case 1:
>> -      osafassert(false);
>> -      break;
>>    
>> -    case 2:
>> +    case DTM_NODE_IP_KEY_TYPE:
>>          if (node->node_ip != nullptr && node->pat_ip_address.key_info) {
>>            TRACE("DTM:Deleting node_ip from the  database with node_ip :%s 
>> as key",
>>                  node->node_ip);
>> @@ -290,7 +309,7 @@ uint32_t dtm_node_delete(DTM_NODE_DB *node, int i) {
>>        default:
>>          TRACE(
>>              "DTM:Deleting node from the database  with unknown option :%d 
>> as key",
>> -          i);
>> +          type);
>>          osafassert(0);
>>      }
>>    
>> diff --git a/src/dtm/dtmnd/dtm_node_sockets.cc 
>> b/src/dtm/dtmnd/dtm_node_sockets.cc
>> index 0ddfc6f58..a0b835c29 100644
>> --- a/src/dtm/dtmnd/dtm_node_sockets.cc
>> +++ b/src/dtm/dtmnd/dtm_node_sockets.cc
>> @@ -171,11 +171,11 @@ void dtm_comm_socket_close(DTM_NODE_DB *node) {
>>          }
>>        }
>>    
>> -    if (dtm_node_delete(node, 0) != NCSCC_RC_SUCCESS) {
>> +    if (dtm_node_delete(node, DTM_NODE_ID_KEY_TYPE) != NCSCC_RC_SUCCESS) {
>>          LOG_ER("DTM :dtm_node_delete failed ");
>>        }
>>    
>> -    if (dtm_node_delete(node, 2) != NCSCC_RC_SUCCESS) {
>> +    if (dtm_node_delete(node, DTM_NODE_IP_KEY_TYPE) != NCSCC_RC_SUCCESS) {
>>          LOG_ER("DTM :dtm_node_delete failed ");
>>        }
>>    
>> @@ -566,7 +566,11 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB 
>> *dtms_cb, uint8_t *data,
>>        }
>>      }
>>    
>> -  new_node = dtm_node_get_by_id(node.node_id);
>> +  // Retrieve node from nodeip instead of nodeid to prevent the case dtm 
>> already
>> +  // accepted remote node connection and dtm already add the nodeip to 
>> database.
>> +  // If so, dtm should drop this message as discovery in progress.
>> +  uint8_t *nodeip = reinterpret_cast<uint8_t *>(node.node_ip);
>> +  new_node = dtm_node_get(nodeip, DTM_NODE_IP_KEY_TYPE);
>>      if (new_node != nullptr) {
>>        if ((new_node->node_id == 0) || (new_node->node_id == node.node_id) ||
>>            (strncmp(node.node_ip, new_node->node_ip, INET6_ADDRSTRLEN) == 
>> 0)) {
>> @@ -593,10 +597,10 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB 
>> *dtms_cb, uint8_t *data,
>>                     0))) {
>>          TRACE("DTM: deleting stale enty cluster_id: %d, node_id :%u, 
>> node_ip:%s",
>>                node.cluster_id, node.node_id, node.node_ip);
>> -      if (dtm_node_delete(new_node, 0) != NCSCC_RC_SUCCESS) {
>> +      if (dtm_node_delete(new_node, DTM_NODE_ID_KEY_TYPE) != 
>> NCSCC_RC_SUCCESS) {
>>            LOG_ER("DTM :dtm_node_delete failed (recv())");
>>          }
>> -      if (dtm_node_delete(new_node, 2) != NCSCC_RC_SUCCESS) {
>> +      if (dtm_node_delete(new_node, DTM_NODE_IP_KEY_TYPE) != 
>> NCSCC_RC_SUCCESS) {
>>            LOG_ER("DTM :dtm_node_delete failed (recv())");
>>          }
>>          free(new_node);
>> @@ -621,7 +625,7 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB 
>> *dtms_cb, uint8_t *data,
>>      if (sock_desc != -1) {
>>        TRACE("DTM: dtm_node_add .node_ip: %s node_id: %u, comm_socket %d",
>>              new_node->node_ip, new_node->node_id, new_node->comm_socket);
>> -    if (dtm_node_add(new_node, 0) != NCSCC_RC_SUCCESS) {
>> +    if (dtm_node_add(new_node, DTM_NODE_ID_KEY_TYPE) != NCSCC_RC_SUCCESS) {
>>          LOG_ER("DTM: dtm_node_add failed .node_ip: %s, node_id: %u",
>>                 new_node->node_ip, new_node->node_id);
>>          dtm_comm_socket_close(new_node);
>> @@ -629,7 +633,7 @@ DTM_NODE_DB *dtm_process_connect(DTM_INTERNODE_CB 
>> *dtms_cb, uint8_t *data,
>>          goto node_fail;
>>        }
>>    
>> -    if (dtm_node_add(new_node, 2) != NCSCC_RC_SUCCESS) {
>> +    if (dtm_node_add(new_node, DTM_NODE_IP_KEY_TYPE) != NCSCC_RC_SUCCESS) {
>>          LOG_ER("DTM: dtm_node_add failed .node_ip: %s, node_id: %u",
>>                 new_node->node_ip, new_node->node_id);
>>          dtm_comm_socket_close(new_node);
>> @@ -748,7 +752,7 @@ DTM_NODE_DB *dtm_process_accept(DTM_INTERNODE_CB 
>> *dtms_cb, int stream_sock) {
>>          continue;
>>        }
>>    
>> -    if (dtm_node_add(new_node, 2) != NCSCC_RC_SUCCESS) {
>> +    if (dtm_node_add(new_node, DTM_NODE_IP_KEY_TYPE) != NCSCC_RC_SUCCESS) {
>>          LOG_ER("DTM: dtm_node_add failed .node_ip: %s, node_id: %u",
>>                 new_node->node_ip, new_node->node_id);
>>          dtm_comm_socket_close(new_node);



_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to