Re: [devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004]

2019-02-12 Thread Thang Nguyen
Hi Minh,

There is NO node info in data base of CLM in both case node down and node 
unconfigured.
So we can not use it to distingush b/w them.

B.R
/Thang

-Original Message-
From: Minh Hon Chau  
Sent: Wednesday, February 13, 2019 9:49 AM
To: Tran Thuan ; 'thang.d.nguyen' 
; gary@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1/1] clmd: not send sync respond to client if node down 
[#3004]

Hi Thang,

The patch looks ok, but I'm thinking of not introducing mds_node_down_list.

In SAI-AIS-CLM-B.04.01:

"The term unconfigured node is used in this document to designate an execution 
environment that is not configured to host a CLM node."

May we add a check if a node is unconfigured because it's not in ee_lookup, to 
distinguish with if a node is down?

Thanks

Minh

On 1/2/19 2:34 pm, Tran Thuan wrote:
> Hi Thang,
>
> ACK from me for code review, not tested.
>
> Best Regards,
> ThuanTr
>
> -Original Message-
> From: thang.d.nguyen 
> Sent: Wednesday, January 30, 2019 1:20 AM
> To: gary@dektech.com.au; minh.c...@dektech.com.au; 
> thuan.t...@dektech.com.au
> Cc: opensaf-devel@lists.sourceforge.net; thang.d.nguyen 
> 
> Subject: [PATCH 1/1] clmd: not send sync respond to client if node 
> down [#3004]
>
> Clmd will not send sync respond to client if the node that client 
> resided on down. This will avoid timeout when clmd send via mds.
> ---
>   src/clm/clmd/clms_cb.h   |  3 +++
>   src/clm/clmd/clms_evt.cc | 22 +- 
> src/clm/clmd/clms_mds.cc |  2 +-
>   3 files changed, 21 insertions(+), 6 deletions(-)
>
> diff --git a/src/clm/clmd/clms_cb.h b/src/clm/clmd/clms_cb.h index 
> 4d7fdc7..637d53a 100644
> --- a/src/clm/clmd/clms_cb.h
> +++ b/src/clm/clmd/clms_cb.h
> @@ -22,6 +22,7 @@
>   #include "osaf/config.h"
>   #endif
>   #include 
> +#include 
>   #include 
>   #include 
>   #include 
> @@ -238,6 +239,8 @@ typedef struct clms_cb_t {
> *node_down_list_head; /*NODE_DOWN record - Fix when active 
> node goes down
>*/
> NODE_DOWN_LIST *node_down_list_tail;
> +  // Record node id when receive MDS node down  std::set 
> + mds_node_down_list;
> bool is_impl_set;
> bool nid_started; /**< true if started by NID */
> NCS_PATRICIA_TREE iplist; /* To temporarily store ipaddress 
> information diff --git a/src/clm/clmd/clms_evt.cc 
> b/src/clm/clmd/clms_evt.cc index
> c2b83c2..5265002 100644
> --- a/src/clm/clmd/clms_evt.cc
> +++ b/src/clm/clmd/clms_evt.cc
> @@ -943,6 +943,8 @@ static uint32_t proc_mds_node_evt(CLMSV_CLMS_EVT *evt) {
>   goto done;
> }
>   
> +  clms_cb->mds_node_down_list.insert(node_id);
> +
> if ((clms_cb->ha_state == SA_AMF_HA_ACTIVE) ||
> (clms_cb->ha_state == SA_AMF_HA_QUIESCED)) {
>   clms_track_send_node_down(node); @@ -1531,19 +1533,24 @@ static 
> uint32_t proc_initialize_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) {
>   
> TRACE_ENTER2("dest %" PRIx64, evt->fr_dest);
>   
> -  /*Handle the wrap around */
> -  if (clms_cb->last_client_id == INT_MAX) clms_cb->last_client_id = 
> 0;
> -
> -  clms_cb->last_client_id++;
> -
> node = clms_node_get_by_id(node_id);
> TRACE("Node id = %d", node_id);
> if (node == nullptr) {
>   LOG_IN("Initialize request of client on an unconfigured node: 
> node_id = %d",
>  node_id);
>   ais_rc = SA_AIS_ERR_UNAVAILABLE;
> +std::set::iterator it =
> +  clms_cb->mds_node_down_list.find(node_id);
> +if (it != clms_cb->mds_node_down_list.end()) {
> +  return (uint32_t)ais_rc;
> +}
> }
>   
> +  /*Handle the wrap around */
> +  if (clms_cb->last_client_id == INT_MAX) clms_cb->last_client_id = 
> + 0;
> +
> +  clms_cb->last_client_id++;
> +
> if ((client = clms_client_new(evt->fr_dest, clms_cb->last_client_id)) ==
> nullptr) {
>   TRACE("Creating a new client failed"); @@ -1564,6 +1571,11 @@ 
> static uint32_t proc_initialize_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) {
>   return rc;
> }
>   
> +  std::set::iterator it =
> + clms_cb->mds_node_down_list.find(node_id);
> +  if (it != clms_cb->mds_node_down_list.end()) {
> +clms_cb->mds_node_down_list.erase(it);
> +  }
> +
> if (node) {
>   if (node->member == false) {
> rc = clms_send_is_member_info(clms_cb, node->node_id, 
> node->member, diff --git a/src/clm/clmd/clms_mds.cc 
> b/src/clm/clmd/clms_mds.cc index 58552cc..833d18c 100644
> --- a/src/clm/clmd/clms_mds.cc
> +++ b/src/clm/clmd/clms_mds.cc
> @@ -1097,7 +1097,7 @@ static uint32_t clms_mds_node_event(struct 
> ncsmds_callback_info *mds_info) {
>   clmsv_evt->info.node_mds_info.node_id = 
> mds_info->info.node_evt.node_id;
>   clmsv_evt->info.node_mds_info.nodeup = SA_TRUE;
>   
> -rc = m_NCS_IPC_SEND(_cb->mbx, clmsv_evt, NCS_IPC_PRIORITY_HIGH);
> +rc = m_NCS_IPC_SEND(_cb->mbx, clmsv_evt, 
> + NCS_IPC_PRIORITY_VERY_HIGH);
>   if (rc != NCSCC_RC_SUCCESS) {
> TRACE("IPC send failed 

Re: [devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004]

2019-02-12 Thread Minh Hon Chau

Hi Thang,

The patch looks ok, but I'm thinking of not introducing mds_node_down_list.

In SAI-AIS-CLM-B.04.01:

"The term unconfigured node is used in this document to designate an 
execution environment that is not configured to host a CLM node."


May we add a check if a node is unconfigured because it's not in 
ee_lookup, to distinguish with if a node is down?


Thanks

Minh

On 1/2/19 2:34 pm, Tran Thuan wrote:

Hi Thang,

ACK from me for code review, not tested.

Best Regards,
ThuanTr

-Original Message-
From: thang.d.nguyen 
Sent: Wednesday, January 30, 2019 1:20 AM
To: gary@dektech.com.au; minh.c...@dektech.com.au;
thuan.t...@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net; thang.d.nguyen

Subject: [PATCH 1/1] clmd: not send sync respond to client if node down
[#3004]

Clmd will not send sync respond to client if the node that client resided on
down. This will avoid timeout when clmd send via mds.
---
  src/clm/clmd/clms_cb.h   |  3 +++
  src/clm/clmd/clms_evt.cc | 22 +-
src/clm/clmd/clms_mds.cc |  2 +-
  3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/src/clm/clmd/clms_cb.h b/src/clm/clmd/clms_cb.h index
4d7fdc7..637d53a 100644
--- a/src/clm/clmd/clms_cb.h
+++ b/src/clm/clmd/clms_cb.h
@@ -22,6 +22,7 @@
  #include "osaf/config.h"
  #endif
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -238,6 +239,8 @@ typedef struct clms_cb_t {
*node_down_list_head; /*NODE_DOWN record - Fix when active node goes
down
   */
NODE_DOWN_LIST *node_down_list_tail;
+  // Record node id when receive MDS node down  std::set
+ mds_node_down_list;
bool is_impl_set;
bool nid_started; /**< true if started by NID */
NCS_PATRICIA_TREE iplist; /* To temporarily store ipaddress information
diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc index
c2b83c2..5265002 100644
--- a/src/clm/clmd/clms_evt.cc
+++ b/src/clm/clmd/clms_evt.cc
@@ -943,6 +943,8 @@ static uint32_t proc_mds_node_evt(CLMSV_CLMS_EVT *evt) {
  goto done;
}
  
+  clms_cb->mds_node_down_list.insert(node_id);

+
if ((clms_cb->ha_state == SA_AMF_HA_ACTIVE) ||
(clms_cb->ha_state == SA_AMF_HA_QUIESCED)) {
  clms_track_send_node_down(node);
@@ -1531,19 +1533,24 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb,
CLMSV_CLMS_EVT *evt) {
  
TRACE_ENTER2("dest %" PRIx64, evt->fr_dest);
  
-  /*Handle the wrap around */

-  if (clms_cb->last_client_id == INT_MAX) clms_cb->last_client_id = 0;
-
-  clms_cb->last_client_id++;
-
node = clms_node_get_by_id(node_id);
TRACE("Node id = %d", node_id);
if (node == nullptr) {
  LOG_IN("Initialize request of client on an unconfigured node: node_id =
%d",
 node_id);
  ais_rc = SA_AIS_ERR_UNAVAILABLE;
+std::set::iterator it =
+  clms_cb->mds_node_down_list.find(node_id);
+if (it != clms_cb->mds_node_down_list.end()) {
+  return (uint32_t)ais_rc;
+}
}
  
+  /*Handle the wrap around */

+  if (clms_cb->last_client_id == INT_MAX) clms_cb->last_client_id = 0;
+
+  clms_cb->last_client_id++;
+
if ((client = clms_client_new(evt->fr_dest, clms_cb->last_client_id)) ==
nullptr) {
  TRACE("Creating a new client failed"); @@ -1564,6 +1571,11 @@ static
uint32_t proc_initialize_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) {
  return rc;
}
  
+  std::set::iterator it =

+ clms_cb->mds_node_down_list.find(node_id);
+  if (it != clms_cb->mds_node_down_list.end()) {
+clms_cb->mds_node_down_list.erase(it);
+  }
+
if (node) {
  if (node->member == false) {
rc = clms_send_is_member_info(clms_cb, node->node_id, node->member,
diff --git a/src/clm/clmd/clms_mds.cc b/src/clm/clmd/clms_mds.cc index
58552cc..833d18c 100644
--- a/src/clm/clmd/clms_mds.cc
+++ b/src/clm/clmd/clms_mds.cc
@@ -1097,7 +1097,7 @@ static uint32_t clms_mds_node_event(struct
ncsmds_callback_info *mds_info) {
  clmsv_evt->info.node_mds_info.node_id =
mds_info->info.node_evt.node_id;
  clmsv_evt->info.node_mds_info.nodeup = SA_TRUE;
  
-rc = m_NCS_IPC_SEND(_cb->mbx, clmsv_evt, NCS_IPC_PRIORITY_HIGH);

+rc = m_NCS_IPC_SEND(_cb->mbx, clmsv_evt,
+ NCS_IPC_PRIORITY_VERY_HIGH);
  if (rc != NCSCC_RC_SUCCESS) {
TRACE("IPC send failed %d", rc);
free(clmsv_evt);
--
2.7.4






___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


Re: [devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004]

2019-01-31 Thread Tran Thuan
Hi Thang,

ACK from me for code review, not tested.

Best Regards,
ThuanTr

-Original Message-
From: thang.d.nguyen  
Sent: Wednesday, January 30, 2019 1:20 AM
To: gary@dektech.com.au; minh.c...@dektech.com.au;
thuan.t...@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net; thang.d.nguyen

Subject: [PATCH 1/1] clmd: not send sync respond to client if node down
[#3004]

Clmd will not send sync respond to client if the node that client resided on
down. This will avoid timeout when clmd send via mds.
---
 src/clm/clmd/clms_cb.h   |  3 +++
 src/clm/clmd/clms_evt.cc | 22 +-
src/clm/clmd/clms_mds.cc |  2 +-
 3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/src/clm/clmd/clms_cb.h b/src/clm/clmd/clms_cb.h index
4d7fdc7..637d53a 100644
--- a/src/clm/clmd/clms_cb.h
+++ b/src/clm/clmd/clms_cb.h
@@ -22,6 +22,7 @@
 #include "osaf/config.h"
 #endif
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -238,6 +239,8 @@ typedef struct clms_cb_t {
   *node_down_list_head; /*NODE_DOWN record - Fix when active node goes
down
  */
   NODE_DOWN_LIST *node_down_list_tail;
+  // Record node id when receive MDS node down  std::set 
+ mds_node_down_list;
   bool is_impl_set;
   bool nid_started; /**< true if started by NID */
   NCS_PATRICIA_TREE iplist; /* To temporarily store ipaddress information
diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc index
c2b83c2..5265002 100644
--- a/src/clm/clmd/clms_evt.cc
+++ b/src/clm/clmd/clms_evt.cc
@@ -943,6 +943,8 @@ static uint32_t proc_mds_node_evt(CLMSV_CLMS_EVT *evt) {
 goto done;
   }
 
+  clms_cb->mds_node_down_list.insert(node_id);
+
   if ((clms_cb->ha_state == SA_AMF_HA_ACTIVE) ||
   (clms_cb->ha_state == SA_AMF_HA_QUIESCED)) {
 clms_track_send_node_down(node);
@@ -1531,19 +1533,24 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb,
CLMSV_CLMS_EVT *evt) {
 
   TRACE_ENTER2("dest %" PRIx64, evt->fr_dest);
 
-  /*Handle the wrap around */
-  if (clms_cb->last_client_id == INT_MAX) clms_cb->last_client_id = 0;
-
-  clms_cb->last_client_id++;
-
   node = clms_node_get_by_id(node_id);
   TRACE("Node id = %d", node_id);
   if (node == nullptr) {
 LOG_IN("Initialize request of client on an unconfigured node: node_id =
%d",
node_id);
 ais_rc = SA_AIS_ERR_UNAVAILABLE;
+std::set::iterator it =
+  clms_cb->mds_node_down_list.find(node_id);
+if (it != clms_cb->mds_node_down_list.end()) {
+  return (uint32_t)ais_rc;
+}
   }
 
+  /*Handle the wrap around */
+  if (clms_cb->last_client_id == INT_MAX) clms_cb->last_client_id = 0;
+
+  clms_cb->last_client_id++;
+
   if ((client = clms_client_new(evt->fr_dest, clms_cb->last_client_id)) ==
   nullptr) {
 TRACE("Creating a new client failed"); @@ -1564,6 +1571,11 @@ static
uint32_t proc_initialize_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) {
 return rc;
   }
 
+  std::set::iterator it = 
+ clms_cb->mds_node_down_list.find(node_id);
+  if (it != clms_cb->mds_node_down_list.end()) {
+clms_cb->mds_node_down_list.erase(it);
+  }
+
   if (node) {
 if (node->member == false) {
   rc = clms_send_is_member_info(clms_cb, node->node_id, node->member,
diff --git a/src/clm/clmd/clms_mds.cc b/src/clm/clmd/clms_mds.cc index
58552cc..833d18c 100644
--- a/src/clm/clmd/clms_mds.cc
+++ b/src/clm/clmd/clms_mds.cc
@@ -1097,7 +1097,7 @@ static uint32_t clms_mds_node_event(struct
ncsmds_callback_info *mds_info) {
 clmsv_evt->info.node_mds_info.node_id =
mds_info->info.node_evt.node_id;
 clmsv_evt->info.node_mds_info.nodeup = SA_TRUE;
 
-rc = m_NCS_IPC_SEND(_cb->mbx, clmsv_evt, NCS_IPC_PRIORITY_HIGH);
+rc = m_NCS_IPC_SEND(_cb->mbx, clmsv_evt, 
+ NCS_IPC_PRIORITY_VERY_HIGH);
 if (rc != NCSCC_RC_SUCCESS) {
   TRACE("IPC send failed %d", rc);
   free(clmsv_evt);
--
2.7.4




___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


[devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004]

2019-01-31 Thread thang.d.nguyen
Clmd will not send sync respond to client if the node that
client resided on down. This will avoid timeout when clmd send via mds.
---
 src/clm/clmd/clms_cb.h   |  3 +++
 src/clm/clmd/clms_evt.cc | 22 +-
 src/clm/clmd/clms_mds.cc |  2 +-
 3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/src/clm/clmd/clms_cb.h b/src/clm/clmd/clms_cb.h
index 4d7fdc7..637d53a 100644
--- a/src/clm/clmd/clms_cb.h
+++ b/src/clm/clmd/clms_cb.h
@@ -22,6 +22,7 @@
 #include "osaf/config.h"
 #endif
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -238,6 +239,8 @@ typedef struct clms_cb_t {
   *node_down_list_head; /*NODE_DOWN record - Fix when active node goes down
  */
   NODE_DOWN_LIST *node_down_list_tail;
+  // Record node id when receive MDS node down
+  std::set mds_node_down_list;
   bool is_impl_set;
   bool nid_started; /**< true if started by NID */
   NCS_PATRICIA_TREE iplist; /* To temporarily store ipaddress information
diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc
index c2b83c2..5265002 100644
--- a/src/clm/clmd/clms_evt.cc
+++ b/src/clm/clmd/clms_evt.cc
@@ -943,6 +943,8 @@ static uint32_t proc_mds_node_evt(CLMSV_CLMS_EVT *evt) {
 goto done;
   }
 
+  clms_cb->mds_node_down_list.insert(node_id);
+
   if ((clms_cb->ha_state == SA_AMF_HA_ACTIVE) ||
   (clms_cb->ha_state == SA_AMF_HA_QUIESCED)) {
 clms_track_send_node_down(node);
@@ -1531,19 +1533,24 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb, 
CLMSV_CLMS_EVT *evt) {
 
   TRACE_ENTER2("dest %" PRIx64, evt->fr_dest);
 
-  /*Handle the wrap around */
-  if (clms_cb->last_client_id == INT_MAX) clms_cb->last_client_id = 0;
-
-  clms_cb->last_client_id++;
-
   node = clms_node_get_by_id(node_id);
   TRACE("Node id = %d", node_id);
   if (node == nullptr) {
 LOG_IN("Initialize request of client on an unconfigured node: node_id = 
%d",
node_id);
 ais_rc = SA_AIS_ERR_UNAVAILABLE;
+std::set::iterator it =
+  clms_cb->mds_node_down_list.find(node_id);
+if (it != clms_cb->mds_node_down_list.end()) {
+  return (uint32_t)ais_rc;
+}
   }
 
+  /*Handle the wrap around */
+  if (clms_cb->last_client_id == INT_MAX) clms_cb->last_client_id = 0;
+
+  clms_cb->last_client_id++;
+
   if ((client = clms_client_new(evt->fr_dest, clms_cb->last_client_id)) ==
   nullptr) {
 TRACE("Creating a new client failed");
@@ -1564,6 +1571,11 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb, 
CLMSV_CLMS_EVT *evt) {
 return rc;
   }
 
+  std::set::iterator it = clms_cb->mds_node_down_list.find(node_id);
+  if (it != clms_cb->mds_node_down_list.end()) {
+clms_cb->mds_node_down_list.erase(it);
+  }
+
   if (node) {
 if (node->member == false) {
   rc = clms_send_is_member_info(clms_cb, node->node_id, node->member,
diff --git a/src/clm/clmd/clms_mds.cc b/src/clm/clmd/clms_mds.cc
index 58552cc..833d18c 100644
--- a/src/clm/clmd/clms_mds.cc
+++ b/src/clm/clmd/clms_mds.cc
@@ -1097,7 +1097,7 @@ static uint32_t clms_mds_node_event(struct 
ncsmds_callback_info *mds_info) {
 clmsv_evt->info.node_mds_info.node_id = mds_info->info.node_evt.node_id;
 clmsv_evt->info.node_mds_info.nodeup = SA_TRUE;
 
-rc = m_NCS_IPC_SEND(_cb->mbx, clmsv_evt, NCS_IPC_PRIORITY_HIGH);
+rc = m_NCS_IPC_SEND(_cb->mbx, clmsv_evt, NCS_IPC_PRIORITY_VERY_HIGH);
 if (rc != NCSCC_RC_SUCCESS) {
   TRACE("IPC send failed %d", rc);
   free(clmsv_evt);
-- 
2.7.4



___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


Re: [devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004]

2019-01-31 Thread Thang Nguyen
Hi Thuan,

See my comment in line.
Due to use mutext lock can cause the mds thread stuck or process lower.
So I update the solution to NOT use the mutex. Instead of change the node
down priority of MDS node down event 
>From HIGH to VERY_HIGH plus add the set to record the nod_id when receives
MDS node down event.

Please review again in v1.

B.R
/Thang

-Original Message-
From: Tran Thuan  
Sent: Wednesday, January 30, 2019 1:41 PM
To: 'thang.d.nguyen' ;
gary@dektech.com.au; minh.c...@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net
Subject: RE: [devel] [PATCH 1/1] clmd: not send sync respond to client if
node down [#3004]

Hi Thang,

Some comments inline with [Thuan]

Best Regards,
ThuanTr

-Original Message-
From: thang.d.nguyen 
Sent: Tuesday, January 29, 2019 4:53 AM
To: gary@dektech.com.au; minh.c...@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net
Subject: [devel] [PATCH 1/1] clmd: not send sync respond to client if node
down [#3004]

clmd will not send sync respond to client if the node that client resided on
down. This will avoid timeout when clmd send via mds.
---
 src/clm/clmd/clms_cb.h|  5 
 src/clm/clmd/clms_evt.cc  | 35 ++-
 src/clm/clmd/clms_evt.h   |  1 +
 src/clm/clmd/clms_main.cc |  4 
 src/clm/clmd/clms_mds.cc  | 61
+++
 5 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/src/clm/clmd/clms_cb.h b/src/clm/clmd/clms_cb.h index
4d7fdc7..6999761 100644
--- a/src/clm/clmd/clms_cb.h
+++ b/src/clm/clmd/clms_cb.h
@@ -22,6 +22,7 @@
 #include "osaf/config.h"
 #endif
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -238,6 +239,8 @@ typedef struct clms_cb_t {
   *node_down_list_head; /*NODE_DOWN record - Fix when active node goes
down
  */
   NODE_DOWN_LIST *node_down_list_tail;
+  // Node down list - Updated by MDS thread  std::list 
+ mds_node_down_list;

[Thuan]: The element is simple and small, I suggest to use
std::set. We can avoid new/delete and SET to avoid duplicate
element
[Thang]: OK.

   bool is_impl_set;
   bool nid_started; /**< true if started by NID */
   NCS_PATRICIA_TREE iplist; /* To temporarily store ipaddress information
@@ -245,6 +248,8 @@ typedef struct clms_cb_t {
 
   /* Mutex protecting shared data used by the scale-out functionality */
   pthread_mutex_t scale_out_data_mutex;
+  /* Mutex protecting shared data used by the delete/add node-id */ 
+ pthread_mutex_t node_down_list_mutex;
   /* Number of occupied indices in the vectors pending_nodes[] and
* pending_node_ids[] */
   size_t no_of_pending_nodes;
diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc index
c2b83c2..08d4acd 100644
--- a/src/clm/clmd/clms_evt.cc
+++ b/src/clm/clmd/clms_evt.cc
@@ -17,7 +17,6 @@
  *
  */
 
-#include "osaf/configmake.h"
 #include 
 #include 
 #include 
@@ -31,6 +30,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include "osaf/configmake.h"
 #include "base/logtrace.h"
 #include "base/ncsgl_defs.h"
 #include "base/osaf_utility.h"
@@ -1514,6 +1516,31 @@ static uint32_t proc_node_get_async_msg(CLMS_CB *cb,
CLMSV_CLMS_EVT *evt) {  }
 
 /**
+ * Return true if mds node down exist
+ * @param node id
+ *
+ * @return bool
+ */
+bool clms_is_node_down(uint32_t node_id) {
+  TRACE_ENTER();
+  bool found = false;
+  std::list::iterator it;
+  osaf_mutex_lock_ordie(_cb->node_down_list_mutex);
+
+  for (it = clms_cb->mds_node_down_list.begin();
+it != clms_cb->mds_node_down_list.end(); ++it) {
+if (*(*it) == node_id) {
+  found = true;
+  break;
+}
+  }
+
+  osaf_mutex_unlock_ordie(_cb->node_down_list_mutex);
+  TRACE_LEAVE();
+  return found;
+}
+
+/**
  * Handle a initialize message
  * @param cb
  * @param evt
@@ -1556,6 +1583,12 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb,
CLMSV_CLMS_EVT *evt) {
   if (client != nullptr)
 msg.info.api_resp_info.param.client_id = client->client_id;
 
+  if (clms_is_node_down(node_id) == true) {
+LOG_NO("node_id = %d already down, no need sending sync respond",
node_id);
+if (client != nullptr) clms_client_delete(client->client_id);
+return (uint32_t)ais_rc;
+  }
+
[Thuan] Why don't move this block before clms_client_new() then not need to
clms_client_delete()

   rc = clms_mds_msg_send(cb, , >fr_dest, >mds_ctxt,
  MDS_SEND_PRIORITY_HIGH, NCSMDS_SVC_ID_CLMA);
   if (rc != NCSCC_RC_SUCCESS) {
diff --git a/src/clm/clmd/clms_evt.h b/src/clm/clmd/clms_evt.h index
1005456..ef35cbc 100644
--- a/src/clm/clmd/clms_evt.h
+++ b/src/clm/clmd/clms_evt.h
@@ -92,6 +92,7 @@ extern uint32_t clms_clmresp_ok(CLMS_CB *cb,
CLMS_CLUSTER_NODE *op_node,
 CLMS_TRACK_INFO *trkrec);  extern uint32_t
clms_remove_clma_down_rec(CLMS_CB *cb, MDS_DEST mds_dest);  extern void
clms_remove_node_down_rec(SaClmNodeIdT node_id);
+extern bool clms_is_node_down(SaClmNodeIdT node_id);
 extern 

Re: [devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004]

2019-01-29 Thread Tran Thuan
Hi Thang,

Some comments inline with [Thuan]

Best Regards,
ThuanTr

-Original Message-
From: thang.d.nguyen  
Sent: Tuesday, January 29, 2019 4:53 AM
To: gary@dektech.com.au; minh.c...@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net
Subject: [devel] [PATCH 1/1] clmd: not send sync respond to client if node
down [#3004]

clmd will not send sync respond to client if the node that client resided on
down. This will avoid timeout when clmd send via mds.
---
 src/clm/clmd/clms_cb.h|  5 
 src/clm/clmd/clms_evt.cc  | 35 ++-
 src/clm/clmd/clms_evt.h   |  1 +
 src/clm/clmd/clms_main.cc |  4 
 src/clm/clmd/clms_mds.cc  | 61
+++
 5 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/src/clm/clmd/clms_cb.h b/src/clm/clmd/clms_cb.h index
4d7fdc7..6999761 100644
--- a/src/clm/clmd/clms_cb.h
+++ b/src/clm/clmd/clms_cb.h
@@ -22,6 +22,7 @@
 #include "osaf/config.h"
 #endif
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -238,6 +239,8 @@ typedef struct clms_cb_t {
   *node_down_list_head; /*NODE_DOWN record - Fix when active node goes
down
  */
   NODE_DOWN_LIST *node_down_list_tail;
+  // Node down list - Updated by MDS thread  std::list 
+ mds_node_down_list;

[Thuan]: The element is simple and small, I suggest to use
std::set. We can avoid new/delete and SET to avoid duplicate
element

   bool is_impl_set;
   bool nid_started; /**< true if started by NID */
   NCS_PATRICIA_TREE iplist; /* To temporarily store ipaddress information
@@ -245,6 +248,8 @@ typedef struct clms_cb_t {
 
   /* Mutex protecting shared data used by the scale-out functionality */
   pthread_mutex_t scale_out_data_mutex;
+  /* Mutex protecting shared data used by the delete/add node-id */  
+ pthread_mutex_t node_down_list_mutex;
   /* Number of occupied indices in the vectors pending_nodes[] and
* pending_node_ids[] */
   size_t no_of_pending_nodes;
diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc index
c2b83c2..08d4acd 100644
--- a/src/clm/clmd/clms_evt.cc
+++ b/src/clm/clmd/clms_evt.cc
@@ -17,7 +17,6 @@
  *
  */
 
-#include "osaf/configmake.h"
 #include 
 #include 
 #include 
@@ -31,6 +30,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include "osaf/configmake.h"
 #include "base/logtrace.h"
 #include "base/ncsgl_defs.h"
 #include "base/osaf_utility.h"
@@ -1514,6 +1516,31 @@ static uint32_t proc_node_get_async_msg(CLMS_CB *cb,
CLMSV_CLMS_EVT *evt) {  }
 
 /**
+ * Return true if mds node down exist
+ * @param node id
+ *
+ * @return bool
+ */
+bool clms_is_node_down(uint32_t node_id) {
+  TRACE_ENTER();
+  bool found = false;
+  std::list::iterator it;
+  osaf_mutex_lock_ordie(_cb->node_down_list_mutex);
+
+  for (it = clms_cb->mds_node_down_list.begin();
+it != clms_cb->mds_node_down_list.end(); ++it) {
+if (*(*it) == node_id) {
+  found = true;
+  break;
+}
+  }
+
+  osaf_mutex_unlock_ordie(_cb->node_down_list_mutex);
+  TRACE_LEAVE();
+  return found;
+}
+
+/**
  * Handle a initialize message
  * @param cb
  * @param evt
@@ -1556,6 +1583,12 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb,
CLMSV_CLMS_EVT *evt) {
   if (client != nullptr)
 msg.info.api_resp_info.param.client_id = client->client_id;
 
+  if (clms_is_node_down(node_id) == true) {
+LOG_NO("node_id = %d already down, no need sending sync respond",
node_id);
+if (client != nullptr) clms_client_delete(client->client_id);
+return (uint32_t)ais_rc;
+  }
+
[Thuan] Why don't move this block before clms_client_new() then not need to
clms_client_delete()

   rc = clms_mds_msg_send(cb, , >fr_dest, >mds_ctxt,
  MDS_SEND_PRIORITY_HIGH, NCSMDS_SVC_ID_CLMA);
   if (rc != NCSCC_RC_SUCCESS) {
diff --git a/src/clm/clmd/clms_evt.h b/src/clm/clmd/clms_evt.h index
1005456..ef35cbc 100644
--- a/src/clm/clmd/clms_evt.h
+++ b/src/clm/clmd/clms_evt.h
@@ -92,6 +92,7 @@ extern uint32_t clms_clmresp_ok(CLMS_CB *cb,
CLMS_CLUSTER_NODE *op_node,
 CLMS_TRACK_INFO *trkrec);  extern uint32_t
clms_remove_clma_down_rec(CLMS_CB *cb, MDS_DEST mds_dest);  extern void
clms_remove_node_down_rec(SaClmNodeIdT node_id);
+extern bool clms_is_node_down(SaClmNodeIdT node_id);
 extern uint32_t clms_node_add(CLMS_CLUSTER_NODE *node, int i);  extern void
clms_clmresp_error_timeout(CLMS_CB *cb, CLMS_CLUSTER_NODE *node);  extern
bool clms_clma_entry_valid(CLMS_CB *cb, MDS_DEST mds_dest); diff --git
a/src/clm/clmd/clms_main.cc b/src/clm/clmd/clms_main.cc index
ad6e12e..e2c4f21 100644
--- a/src/clm/clmd/clms_main.cc
+++ b/src/clm/clmd/clms_main.cc
@@ -245,6 +245,10 @@ uint32_t clms_cb_init(CLMS_CB *clms_cb) {
   if (pthread_mutex_init(_cb->scale_out_data_mutex, nullptr) != 0) {
 return NCSCC_RC_FAILURE;
   }
+  if (pthread_mutex_init(_cb->node_down_list_mutex, nullptr) != 0) {
+return NCSCC_RC_FAILURE;
+  }
+
   

[devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004]

2019-01-29 Thread thang.d.nguyen
clmd will not send sync respond to client if the node
that client resided on down. This will avoid timeout
when clmd send via mds.
---
 src/clm/clmd/clms_cb.h|  5 
 src/clm/clmd/clms_evt.cc  | 35 ++-
 src/clm/clmd/clms_evt.h   |  1 +
 src/clm/clmd/clms_main.cc |  4 
 src/clm/clmd/clms_mds.cc  | 61 +++
 5 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/src/clm/clmd/clms_cb.h b/src/clm/clmd/clms_cb.h
index 4d7fdc7..6999761 100644
--- a/src/clm/clmd/clms_cb.h
+++ b/src/clm/clmd/clms_cb.h
@@ -22,6 +22,7 @@
 #include "osaf/config.h"
 #endif
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -238,6 +239,8 @@ typedef struct clms_cb_t {
   *node_down_list_head; /*NODE_DOWN record - Fix when active node goes down
  */
   NODE_DOWN_LIST *node_down_list_tail;
+  // Node down list - Updated by MDS thread
+  std::list mds_node_down_list;
   bool is_impl_set;
   bool nid_started; /**< true if started by NID */
   NCS_PATRICIA_TREE iplist; /* To temporarily store ipaddress information
@@ -245,6 +248,8 @@ typedef struct clms_cb_t {
 
   /* Mutex protecting shared data used by the scale-out functionality */
   pthread_mutex_t scale_out_data_mutex;
+  /* Mutex protecting shared data used by the delete/add node-id */
+  pthread_mutex_t node_down_list_mutex;
   /* Number of occupied indices in the vectors pending_nodes[] and
* pending_node_ids[] */
   size_t no_of_pending_nodes;
diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc
index c2b83c2..08d4acd 100644
--- a/src/clm/clmd/clms_evt.cc
+++ b/src/clm/clmd/clms_evt.cc
@@ -17,7 +17,6 @@
  *
  */
 
-#include "osaf/configmake.h"
 #include 
 #include 
 #include 
@@ -31,6 +30,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include "osaf/configmake.h"
 #include "base/logtrace.h"
 #include "base/ncsgl_defs.h"
 #include "base/osaf_utility.h"
@@ -1514,6 +1516,31 @@ static uint32_t proc_node_get_async_msg(CLMS_CB *cb, 
CLMSV_CLMS_EVT *evt) {
 }
 
 /**
+ * Return true if mds node down exist
+ * @param node id
+ *
+ * @return bool
+ */
+bool clms_is_node_down(uint32_t node_id) {
+  TRACE_ENTER();
+  bool found = false;
+  std::list::iterator it;
+  osaf_mutex_lock_ordie(_cb->node_down_list_mutex);
+
+  for (it = clms_cb->mds_node_down_list.begin();
+it != clms_cb->mds_node_down_list.end(); ++it) {
+if (*(*it) == node_id) {
+  found = true;
+  break;
+}
+  }
+
+  osaf_mutex_unlock_ordie(_cb->node_down_list_mutex);
+  TRACE_LEAVE();
+  return found;
+}
+
+/**
  * Handle a initialize message
  * @param cb
  * @param evt
@@ -1556,6 +1583,12 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb, 
CLMSV_CLMS_EVT *evt) {
   if (client != nullptr)
 msg.info.api_resp_info.param.client_id = client->client_id;
 
+  if (clms_is_node_down(node_id) == true) {
+LOG_NO("node_id = %d already down, no need sending sync respond", node_id);
+if (client != nullptr) clms_client_delete(client->client_id);
+return (uint32_t)ais_rc;
+  }
+
   rc = clms_mds_msg_send(cb, , >fr_dest, >mds_ctxt,
  MDS_SEND_PRIORITY_HIGH, NCSMDS_SVC_ID_CLMA);
   if (rc != NCSCC_RC_SUCCESS) {
diff --git a/src/clm/clmd/clms_evt.h b/src/clm/clmd/clms_evt.h
index 1005456..ef35cbc 100644
--- a/src/clm/clmd/clms_evt.h
+++ b/src/clm/clmd/clms_evt.h
@@ -92,6 +92,7 @@ extern uint32_t clms_clmresp_ok(CLMS_CB *cb, 
CLMS_CLUSTER_NODE *op_node,
 CLMS_TRACK_INFO *trkrec);
 extern uint32_t clms_remove_clma_down_rec(CLMS_CB *cb, MDS_DEST mds_dest);
 extern void clms_remove_node_down_rec(SaClmNodeIdT node_id);
+extern bool clms_is_node_down(SaClmNodeIdT node_id);
 extern uint32_t clms_node_add(CLMS_CLUSTER_NODE *node, int i);
 extern void clms_clmresp_error_timeout(CLMS_CB *cb, CLMS_CLUSTER_NODE *node);
 extern bool clms_clma_entry_valid(CLMS_CB *cb, MDS_DEST mds_dest);
diff --git a/src/clm/clmd/clms_main.cc b/src/clm/clmd/clms_main.cc
index ad6e12e..e2c4f21 100644
--- a/src/clm/clmd/clms_main.cc
+++ b/src/clm/clmd/clms_main.cc
@@ -245,6 +245,10 @@ uint32_t clms_cb_init(CLMS_CB *clms_cb) {
   if (pthread_mutex_init(_cb->scale_out_data_mutex, nullptr) != 0) {
 return NCSCC_RC_FAILURE;
   }
+  if (pthread_mutex_init(_cb->node_down_list_mutex, nullptr) != 0) {
+return NCSCC_RC_FAILURE;
+  }
+
   clms_cb->no_of_pending_nodes = 0;
   clms_cb->no_of_inprogress_nodes = 0;
   for (int i = 0; i != (MAX_PENDING_NODES + 1); ++i) {
diff --git a/src/clm/clmd/clms_mds.cc b/src/clm/clmd/clms_mds.cc
index 58552cc..a9e004b 100644
--- a/src/clm/clmd/clms_mds.cc
+++ b/src/clm/clmd/clms_mds.cc
@@ -18,10 +18,13 @@
 
 #include 
 #include 
+#include 
+#include 
 #include "base/logtrace.h"
 #include "base/ncsencdec_pub.h"
 #include "clm/clmd/clms.h"
 #include "clm/common/clmsv_enc_dec.h"
+#include "base/osaf_utility.h"
 
 #define CLMS_SVC_PVT_SUBPART_VERSION 1
 #define