Re: [devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004]
Hi Minh, There is NO node info in data base of CLM in both case node down and node unconfigured. So we can not use it to distingush b/w them. B.R /Thang -Original Message- From: Minh Hon Chau Sent: Wednesday, February 13, 2019 9:49 AM To: Tran Thuan ; 'thang.d.nguyen' ; gary@dektech.com.au Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/1] clmd: not send sync respond to client if node down [#3004] Hi Thang, The patch looks ok, but I'm thinking of not introducing mds_node_down_list. In SAI-AIS-CLM-B.04.01: "The term unconfigured node is used in this document to designate an execution environment that is not configured to host a CLM node." May we add a check if a node is unconfigured because it's not in ee_lookup, to distinguish with if a node is down? Thanks Minh On 1/2/19 2:34 pm, Tran Thuan wrote: > Hi Thang, > > ACK from me for code review, not tested. > > Best Regards, > ThuanTr > > -Original Message- > From: thang.d.nguyen > Sent: Wednesday, January 30, 2019 1:20 AM > To: gary@dektech.com.au; minh.c...@dektech.com.au; > thuan.t...@dektech.com.au > Cc: opensaf-devel@lists.sourceforge.net; thang.d.nguyen > > Subject: [PATCH 1/1] clmd: not send sync respond to client if node > down [#3004] > > Clmd will not send sync respond to client if the node that client > resided on down. This will avoid timeout when clmd send via mds. > --- > src/clm/clmd/clms_cb.h | 3 +++ > src/clm/clmd/clms_evt.cc | 22 +- > src/clm/clmd/clms_mds.cc | 2 +- > 3 files changed, 21 insertions(+), 6 deletions(-) > > diff --git a/src/clm/clmd/clms_cb.h b/src/clm/clmd/clms_cb.h index > 4d7fdc7..637d53a 100644 > --- a/src/clm/clmd/clms_cb.h > +++ b/src/clm/clmd/clms_cb.h > @@ -22,6 +22,7 @@ > #include "osaf/config.h" > #endif > #include > +#include > #include > #include > #include > @@ -238,6 +239,8 @@ typedef struct clms_cb_t { > *node_down_list_head; /*NODE_DOWN record - Fix when active > node goes down >*/ > NODE_DOWN_LIST *node_down_list_tail; > + // Record node id when receive MDS node down std::set > + mds_node_down_list; > bool is_impl_set; > bool nid_started; /**< true if started by NID */ > NCS_PATRICIA_TREE iplist; /* To temporarily store ipaddress > information diff --git a/src/clm/clmd/clms_evt.cc > b/src/clm/clmd/clms_evt.cc index > c2b83c2..5265002 100644 > --- a/src/clm/clmd/clms_evt.cc > +++ b/src/clm/clmd/clms_evt.cc > @@ -943,6 +943,8 @@ static uint32_t proc_mds_node_evt(CLMSV_CLMS_EVT *evt) { > goto done; > } > > + clms_cb->mds_node_down_list.insert(node_id); > + > if ((clms_cb->ha_state == SA_AMF_HA_ACTIVE) || > (clms_cb->ha_state == SA_AMF_HA_QUIESCED)) { > clms_track_send_node_down(node); @@ -1531,19 +1533,24 @@ static > uint32_t proc_initialize_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) { > > TRACE_ENTER2("dest %" PRIx64, evt->fr_dest); > > - /*Handle the wrap around */ > - if (clms_cb->last_client_id == INT_MAX) clms_cb->last_client_id = > 0; > - > - clms_cb->last_client_id++; > - > node = clms_node_get_by_id(node_id); > TRACE("Node id = %d", node_id); > if (node == nullptr) { > LOG_IN("Initialize request of client on an unconfigured node: > node_id = %d", > node_id); > ais_rc = SA_AIS_ERR_UNAVAILABLE; > +std::set::iterator it = > + clms_cb->mds_node_down_list.find(node_id); > +if (it != clms_cb->mds_node_down_list.end()) { > + return (uint32_t)ais_rc; > +} > } > > + /*Handle the wrap around */ > + if (clms_cb->last_client_id == INT_MAX) clms_cb->last_client_id = > + 0; > + > + clms_cb->last_client_id++; > + > if ((client = clms_client_new(evt->fr_dest, clms_cb->last_client_id)) == > nullptr) { > TRACE("Creating a new client failed"); @@ -1564,6 +1571,11 @@ > static uint32_t proc_initialize_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) { > return rc; > } > > + std::set::iterator it = > + clms_cb->mds_node_down_list.find(node_id); > + if (it != clms_cb->mds_node_down_list.end()) { > +clms_cb->mds_node_down_list.erase(it); > + } > + > if (node) { > if (node->member == false) { > rc = clms_send_is_member_info(clms_cb, node->node_id, > node->member, diff --git a/src/clm/clmd/clms_mds.cc > b/src/clm/clmd/clms_mds.cc index 58552cc..833d18c 100644 > --- a/src/clm/clmd/clms_mds.cc > +++ b/src/clm/clmd/clms_mds.cc > @@ -1097,7 +1097,7 @@ static uint32_t clms_mds_node_event(struct > ncsmds_callback_info *mds_info) { > clmsv_evt->info.node_mds_info.node_id = > mds_info->info.node_evt.node_id; > clmsv_evt->info.node_mds_info.nodeup = SA_TRUE; > > -rc = m_NCS_IPC_SEND(_cb->mbx, clmsv_evt, NCS_IPC_PRIORITY_HIGH); > +rc = m_NCS_IPC_SEND(_cb->mbx, clmsv_evt, > + NCS_IPC_PRIORITY_VERY_HIGH); > if (rc != NCSCC_RC_SUCCESS) { > TRACE("IPC send failed
Re: [devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004]
Hi Thang, The patch looks ok, but I'm thinking of not introducing mds_node_down_list. In SAI-AIS-CLM-B.04.01: "The term unconfigured node is used in this document to designate an execution environment that is not configured to host a CLM node." May we add a check if a node is unconfigured because it's not in ee_lookup, to distinguish with if a node is down? Thanks Minh On 1/2/19 2:34 pm, Tran Thuan wrote: Hi Thang, ACK from me for code review, not tested. Best Regards, ThuanTr -Original Message- From: thang.d.nguyen Sent: Wednesday, January 30, 2019 1:20 AM To: gary@dektech.com.au; minh.c...@dektech.com.au; thuan.t...@dektech.com.au Cc: opensaf-devel@lists.sourceforge.net; thang.d.nguyen Subject: [PATCH 1/1] clmd: not send sync respond to client if node down [#3004] Clmd will not send sync respond to client if the node that client resided on down. This will avoid timeout when clmd send via mds. --- src/clm/clmd/clms_cb.h | 3 +++ src/clm/clmd/clms_evt.cc | 22 +- src/clm/clmd/clms_mds.cc | 2 +- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/clm/clmd/clms_cb.h b/src/clm/clmd/clms_cb.h index 4d7fdc7..637d53a 100644 --- a/src/clm/clmd/clms_cb.h +++ b/src/clm/clmd/clms_cb.h @@ -22,6 +22,7 @@ #include "osaf/config.h" #endif #include +#include #include #include #include @@ -238,6 +239,8 @@ typedef struct clms_cb_t { *node_down_list_head; /*NODE_DOWN record - Fix when active node goes down */ NODE_DOWN_LIST *node_down_list_tail; + // Record node id when receive MDS node down std::set + mds_node_down_list; bool is_impl_set; bool nid_started; /**< true if started by NID */ NCS_PATRICIA_TREE iplist; /* To temporarily store ipaddress information diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc index c2b83c2..5265002 100644 --- a/src/clm/clmd/clms_evt.cc +++ b/src/clm/clmd/clms_evt.cc @@ -943,6 +943,8 @@ static uint32_t proc_mds_node_evt(CLMSV_CLMS_EVT *evt) { goto done; } + clms_cb->mds_node_down_list.insert(node_id); + if ((clms_cb->ha_state == SA_AMF_HA_ACTIVE) || (clms_cb->ha_state == SA_AMF_HA_QUIESCED)) { clms_track_send_node_down(node); @@ -1531,19 +1533,24 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) { TRACE_ENTER2("dest %" PRIx64, evt->fr_dest); - /*Handle the wrap around */ - if (clms_cb->last_client_id == INT_MAX) clms_cb->last_client_id = 0; - - clms_cb->last_client_id++; - node = clms_node_get_by_id(node_id); TRACE("Node id = %d", node_id); if (node == nullptr) { LOG_IN("Initialize request of client on an unconfigured node: node_id = %d", node_id); ais_rc = SA_AIS_ERR_UNAVAILABLE; +std::set::iterator it = + clms_cb->mds_node_down_list.find(node_id); +if (it != clms_cb->mds_node_down_list.end()) { + return (uint32_t)ais_rc; +} } + /*Handle the wrap around */ + if (clms_cb->last_client_id == INT_MAX) clms_cb->last_client_id = 0; + + clms_cb->last_client_id++; + if ((client = clms_client_new(evt->fr_dest, clms_cb->last_client_id)) == nullptr) { TRACE("Creating a new client failed"); @@ -1564,6 +1571,11 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) { return rc; } + std::set::iterator it = + clms_cb->mds_node_down_list.find(node_id); + if (it != clms_cb->mds_node_down_list.end()) { +clms_cb->mds_node_down_list.erase(it); + } + if (node) { if (node->member == false) { rc = clms_send_is_member_info(clms_cb, node->node_id, node->member, diff --git a/src/clm/clmd/clms_mds.cc b/src/clm/clmd/clms_mds.cc index 58552cc..833d18c 100644 --- a/src/clm/clmd/clms_mds.cc +++ b/src/clm/clmd/clms_mds.cc @@ -1097,7 +1097,7 @@ static uint32_t clms_mds_node_event(struct ncsmds_callback_info *mds_info) { clmsv_evt->info.node_mds_info.node_id = mds_info->info.node_evt.node_id; clmsv_evt->info.node_mds_info.nodeup = SA_TRUE; -rc = m_NCS_IPC_SEND(_cb->mbx, clmsv_evt, NCS_IPC_PRIORITY_HIGH); +rc = m_NCS_IPC_SEND(_cb->mbx, clmsv_evt, + NCS_IPC_PRIORITY_VERY_HIGH); if (rc != NCSCC_RC_SUCCESS) { TRACE("IPC send failed %d", rc); free(clmsv_evt); -- 2.7.4 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004]
Hi Thang, ACK from me for code review, not tested. Best Regards, ThuanTr -Original Message- From: thang.d.nguyen Sent: Wednesday, January 30, 2019 1:20 AM To: gary@dektech.com.au; minh.c...@dektech.com.au; thuan.t...@dektech.com.au Cc: opensaf-devel@lists.sourceforge.net; thang.d.nguyen Subject: [PATCH 1/1] clmd: not send sync respond to client if node down [#3004] Clmd will not send sync respond to client if the node that client resided on down. This will avoid timeout when clmd send via mds. --- src/clm/clmd/clms_cb.h | 3 +++ src/clm/clmd/clms_evt.cc | 22 +- src/clm/clmd/clms_mds.cc | 2 +- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/clm/clmd/clms_cb.h b/src/clm/clmd/clms_cb.h index 4d7fdc7..637d53a 100644 --- a/src/clm/clmd/clms_cb.h +++ b/src/clm/clmd/clms_cb.h @@ -22,6 +22,7 @@ #include "osaf/config.h" #endif #include +#include #include #include #include @@ -238,6 +239,8 @@ typedef struct clms_cb_t { *node_down_list_head; /*NODE_DOWN record - Fix when active node goes down */ NODE_DOWN_LIST *node_down_list_tail; + // Record node id when receive MDS node down std::set + mds_node_down_list; bool is_impl_set; bool nid_started; /**< true if started by NID */ NCS_PATRICIA_TREE iplist; /* To temporarily store ipaddress information diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc index c2b83c2..5265002 100644 --- a/src/clm/clmd/clms_evt.cc +++ b/src/clm/clmd/clms_evt.cc @@ -943,6 +943,8 @@ static uint32_t proc_mds_node_evt(CLMSV_CLMS_EVT *evt) { goto done; } + clms_cb->mds_node_down_list.insert(node_id); + if ((clms_cb->ha_state == SA_AMF_HA_ACTIVE) || (clms_cb->ha_state == SA_AMF_HA_QUIESCED)) { clms_track_send_node_down(node); @@ -1531,19 +1533,24 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) { TRACE_ENTER2("dest %" PRIx64, evt->fr_dest); - /*Handle the wrap around */ - if (clms_cb->last_client_id == INT_MAX) clms_cb->last_client_id = 0; - - clms_cb->last_client_id++; - node = clms_node_get_by_id(node_id); TRACE("Node id = %d", node_id); if (node == nullptr) { LOG_IN("Initialize request of client on an unconfigured node: node_id = %d", node_id); ais_rc = SA_AIS_ERR_UNAVAILABLE; +std::set::iterator it = + clms_cb->mds_node_down_list.find(node_id); +if (it != clms_cb->mds_node_down_list.end()) { + return (uint32_t)ais_rc; +} } + /*Handle the wrap around */ + if (clms_cb->last_client_id == INT_MAX) clms_cb->last_client_id = 0; + + clms_cb->last_client_id++; + if ((client = clms_client_new(evt->fr_dest, clms_cb->last_client_id)) == nullptr) { TRACE("Creating a new client failed"); @@ -1564,6 +1571,11 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) { return rc; } + std::set::iterator it = + clms_cb->mds_node_down_list.find(node_id); + if (it != clms_cb->mds_node_down_list.end()) { +clms_cb->mds_node_down_list.erase(it); + } + if (node) { if (node->member == false) { rc = clms_send_is_member_info(clms_cb, node->node_id, node->member, diff --git a/src/clm/clmd/clms_mds.cc b/src/clm/clmd/clms_mds.cc index 58552cc..833d18c 100644 --- a/src/clm/clmd/clms_mds.cc +++ b/src/clm/clmd/clms_mds.cc @@ -1097,7 +1097,7 @@ static uint32_t clms_mds_node_event(struct ncsmds_callback_info *mds_info) { clmsv_evt->info.node_mds_info.node_id = mds_info->info.node_evt.node_id; clmsv_evt->info.node_mds_info.nodeup = SA_TRUE; -rc = m_NCS_IPC_SEND(_cb->mbx, clmsv_evt, NCS_IPC_PRIORITY_HIGH); +rc = m_NCS_IPC_SEND(_cb->mbx, clmsv_evt, + NCS_IPC_PRIORITY_VERY_HIGH); if (rc != NCSCC_RC_SUCCESS) { TRACE("IPC send failed %d", rc); free(clmsv_evt); -- 2.7.4 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
[devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004]
Clmd will not send sync respond to client if the node that client resided on down. This will avoid timeout when clmd send via mds. --- src/clm/clmd/clms_cb.h | 3 +++ src/clm/clmd/clms_evt.cc | 22 +- src/clm/clmd/clms_mds.cc | 2 +- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/clm/clmd/clms_cb.h b/src/clm/clmd/clms_cb.h index 4d7fdc7..637d53a 100644 --- a/src/clm/clmd/clms_cb.h +++ b/src/clm/clmd/clms_cb.h @@ -22,6 +22,7 @@ #include "osaf/config.h" #endif #include +#include #include #include #include @@ -238,6 +239,8 @@ typedef struct clms_cb_t { *node_down_list_head; /*NODE_DOWN record - Fix when active node goes down */ NODE_DOWN_LIST *node_down_list_tail; + // Record node id when receive MDS node down + std::set mds_node_down_list; bool is_impl_set; bool nid_started; /**< true if started by NID */ NCS_PATRICIA_TREE iplist; /* To temporarily store ipaddress information diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc index c2b83c2..5265002 100644 --- a/src/clm/clmd/clms_evt.cc +++ b/src/clm/clmd/clms_evt.cc @@ -943,6 +943,8 @@ static uint32_t proc_mds_node_evt(CLMSV_CLMS_EVT *evt) { goto done; } + clms_cb->mds_node_down_list.insert(node_id); + if ((clms_cb->ha_state == SA_AMF_HA_ACTIVE) || (clms_cb->ha_state == SA_AMF_HA_QUIESCED)) { clms_track_send_node_down(node); @@ -1531,19 +1533,24 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) { TRACE_ENTER2("dest %" PRIx64, evt->fr_dest); - /*Handle the wrap around */ - if (clms_cb->last_client_id == INT_MAX) clms_cb->last_client_id = 0; - - clms_cb->last_client_id++; - node = clms_node_get_by_id(node_id); TRACE("Node id = %d", node_id); if (node == nullptr) { LOG_IN("Initialize request of client on an unconfigured node: node_id = %d", node_id); ais_rc = SA_AIS_ERR_UNAVAILABLE; +std::set::iterator it = + clms_cb->mds_node_down_list.find(node_id); +if (it != clms_cb->mds_node_down_list.end()) { + return (uint32_t)ais_rc; +} } + /*Handle the wrap around */ + if (clms_cb->last_client_id == INT_MAX) clms_cb->last_client_id = 0; + + clms_cb->last_client_id++; + if ((client = clms_client_new(evt->fr_dest, clms_cb->last_client_id)) == nullptr) { TRACE("Creating a new client failed"); @@ -1564,6 +1571,11 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) { return rc; } + std::set::iterator it = clms_cb->mds_node_down_list.find(node_id); + if (it != clms_cb->mds_node_down_list.end()) { +clms_cb->mds_node_down_list.erase(it); + } + if (node) { if (node->member == false) { rc = clms_send_is_member_info(clms_cb, node->node_id, node->member, diff --git a/src/clm/clmd/clms_mds.cc b/src/clm/clmd/clms_mds.cc index 58552cc..833d18c 100644 --- a/src/clm/clmd/clms_mds.cc +++ b/src/clm/clmd/clms_mds.cc @@ -1097,7 +1097,7 @@ static uint32_t clms_mds_node_event(struct ncsmds_callback_info *mds_info) { clmsv_evt->info.node_mds_info.node_id = mds_info->info.node_evt.node_id; clmsv_evt->info.node_mds_info.nodeup = SA_TRUE; -rc = m_NCS_IPC_SEND(_cb->mbx, clmsv_evt, NCS_IPC_PRIORITY_HIGH); +rc = m_NCS_IPC_SEND(_cb->mbx, clmsv_evt, NCS_IPC_PRIORITY_VERY_HIGH); if (rc != NCSCC_RC_SUCCESS) { TRACE("IPC send failed %d", rc); free(clmsv_evt); -- 2.7.4 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004]
Hi Thuan, See my comment in line. Due to use mutext lock can cause the mds thread stuck or process lower. So I update the solution to NOT use the mutex. Instead of change the node down priority of MDS node down event >From HIGH to VERY_HIGH plus add the set to record the nod_id when receives MDS node down event. Please review again in v1. B.R /Thang -Original Message- From: Tran Thuan Sent: Wednesday, January 30, 2019 1:41 PM To: 'thang.d.nguyen' ; gary@dektech.com.au; minh.c...@dektech.com.au Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004] Hi Thang, Some comments inline with [Thuan] Best Regards, ThuanTr -Original Message- From: thang.d.nguyen Sent: Tuesday, January 29, 2019 4:53 AM To: gary@dektech.com.au; minh.c...@dektech.com.au Cc: opensaf-devel@lists.sourceforge.net Subject: [devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004] clmd will not send sync respond to client if the node that client resided on down. This will avoid timeout when clmd send via mds. --- src/clm/clmd/clms_cb.h| 5 src/clm/clmd/clms_evt.cc | 35 ++- src/clm/clmd/clms_evt.h | 1 + src/clm/clmd/clms_main.cc | 4 src/clm/clmd/clms_mds.cc | 61 +++ 5 files changed, 105 insertions(+), 1 deletion(-) diff --git a/src/clm/clmd/clms_cb.h b/src/clm/clmd/clms_cb.h index 4d7fdc7..6999761 100644 --- a/src/clm/clmd/clms_cb.h +++ b/src/clm/clmd/clms_cb.h @@ -22,6 +22,7 @@ #include "osaf/config.h" #endif #include +#include #include #include #include @@ -238,6 +239,8 @@ typedef struct clms_cb_t { *node_down_list_head; /*NODE_DOWN record - Fix when active node goes down */ NODE_DOWN_LIST *node_down_list_tail; + // Node down list - Updated by MDS thread std::list + mds_node_down_list; [Thuan]: The element is simple and small, I suggest to use std::set. We can avoid new/delete and SET to avoid duplicate element [Thang]: OK. bool is_impl_set; bool nid_started; /**< true if started by NID */ NCS_PATRICIA_TREE iplist; /* To temporarily store ipaddress information @@ -245,6 +248,8 @@ typedef struct clms_cb_t { /* Mutex protecting shared data used by the scale-out functionality */ pthread_mutex_t scale_out_data_mutex; + /* Mutex protecting shared data used by the delete/add node-id */ + pthread_mutex_t node_down_list_mutex; /* Number of occupied indices in the vectors pending_nodes[] and * pending_node_ids[] */ size_t no_of_pending_nodes; diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc index c2b83c2..08d4acd 100644 --- a/src/clm/clmd/clms_evt.cc +++ b/src/clm/clmd/clms_evt.cc @@ -17,7 +17,6 @@ * */ -#include "osaf/configmake.h" #include #include #include @@ -31,6 +30,9 @@ #include #include #include +#include +#include +#include "osaf/configmake.h" #include "base/logtrace.h" #include "base/ncsgl_defs.h" #include "base/osaf_utility.h" @@ -1514,6 +1516,31 @@ static uint32_t proc_node_get_async_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) { } /** + * Return true if mds node down exist + * @param node id + * + * @return bool + */ +bool clms_is_node_down(uint32_t node_id) { + TRACE_ENTER(); + bool found = false; + std::list::iterator it; + osaf_mutex_lock_ordie(_cb->node_down_list_mutex); + + for (it = clms_cb->mds_node_down_list.begin(); +it != clms_cb->mds_node_down_list.end(); ++it) { +if (*(*it) == node_id) { + found = true; + break; +} + } + + osaf_mutex_unlock_ordie(_cb->node_down_list_mutex); + TRACE_LEAVE(); + return found; +} + +/** * Handle a initialize message * @param cb * @param evt @@ -1556,6 +1583,12 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) { if (client != nullptr) msg.info.api_resp_info.param.client_id = client->client_id; + if (clms_is_node_down(node_id) == true) { +LOG_NO("node_id = %d already down, no need sending sync respond", node_id); +if (client != nullptr) clms_client_delete(client->client_id); +return (uint32_t)ais_rc; + } + [Thuan] Why don't move this block before clms_client_new() then not need to clms_client_delete() rc = clms_mds_msg_send(cb, , >fr_dest, >mds_ctxt, MDS_SEND_PRIORITY_HIGH, NCSMDS_SVC_ID_CLMA); if (rc != NCSCC_RC_SUCCESS) { diff --git a/src/clm/clmd/clms_evt.h b/src/clm/clmd/clms_evt.h index 1005456..ef35cbc 100644 --- a/src/clm/clmd/clms_evt.h +++ b/src/clm/clmd/clms_evt.h @@ -92,6 +92,7 @@ extern uint32_t clms_clmresp_ok(CLMS_CB *cb, CLMS_CLUSTER_NODE *op_node, CLMS_TRACK_INFO *trkrec); extern uint32_t clms_remove_clma_down_rec(CLMS_CB *cb, MDS_DEST mds_dest); extern void clms_remove_node_down_rec(SaClmNodeIdT node_id); +extern bool clms_is_node_down(SaClmNodeIdT node_id); extern
Re: [devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004]
Hi Thang, Some comments inline with [Thuan] Best Regards, ThuanTr -Original Message- From: thang.d.nguyen Sent: Tuesday, January 29, 2019 4:53 AM To: gary@dektech.com.au; minh.c...@dektech.com.au Cc: opensaf-devel@lists.sourceforge.net Subject: [devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004] clmd will not send sync respond to client if the node that client resided on down. This will avoid timeout when clmd send via mds. --- src/clm/clmd/clms_cb.h| 5 src/clm/clmd/clms_evt.cc | 35 ++- src/clm/clmd/clms_evt.h | 1 + src/clm/clmd/clms_main.cc | 4 src/clm/clmd/clms_mds.cc | 61 +++ 5 files changed, 105 insertions(+), 1 deletion(-) diff --git a/src/clm/clmd/clms_cb.h b/src/clm/clmd/clms_cb.h index 4d7fdc7..6999761 100644 --- a/src/clm/clmd/clms_cb.h +++ b/src/clm/clmd/clms_cb.h @@ -22,6 +22,7 @@ #include "osaf/config.h" #endif #include +#include #include #include #include @@ -238,6 +239,8 @@ typedef struct clms_cb_t { *node_down_list_head; /*NODE_DOWN record - Fix when active node goes down */ NODE_DOWN_LIST *node_down_list_tail; + // Node down list - Updated by MDS thread std::list + mds_node_down_list; [Thuan]: The element is simple and small, I suggest to use std::set. We can avoid new/delete and SET to avoid duplicate element bool is_impl_set; bool nid_started; /**< true if started by NID */ NCS_PATRICIA_TREE iplist; /* To temporarily store ipaddress information @@ -245,6 +248,8 @@ typedef struct clms_cb_t { /* Mutex protecting shared data used by the scale-out functionality */ pthread_mutex_t scale_out_data_mutex; + /* Mutex protecting shared data used by the delete/add node-id */ + pthread_mutex_t node_down_list_mutex; /* Number of occupied indices in the vectors pending_nodes[] and * pending_node_ids[] */ size_t no_of_pending_nodes; diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc index c2b83c2..08d4acd 100644 --- a/src/clm/clmd/clms_evt.cc +++ b/src/clm/clmd/clms_evt.cc @@ -17,7 +17,6 @@ * */ -#include "osaf/configmake.h" #include #include #include @@ -31,6 +30,9 @@ #include #include #include +#include +#include +#include "osaf/configmake.h" #include "base/logtrace.h" #include "base/ncsgl_defs.h" #include "base/osaf_utility.h" @@ -1514,6 +1516,31 @@ static uint32_t proc_node_get_async_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) { } /** + * Return true if mds node down exist + * @param node id + * + * @return bool + */ +bool clms_is_node_down(uint32_t node_id) { + TRACE_ENTER(); + bool found = false; + std::list::iterator it; + osaf_mutex_lock_ordie(_cb->node_down_list_mutex); + + for (it = clms_cb->mds_node_down_list.begin(); +it != clms_cb->mds_node_down_list.end(); ++it) { +if (*(*it) == node_id) { + found = true; + break; +} + } + + osaf_mutex_unlock_ordie(_cb->node_down_list_mutex); + TRACE_LEAVE(); + return found; +} + +/** * Handle a initialize message * @param cb * @param evt @@ -1556,6 +1583,12 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) { if (client != nullptr) msg.info.api_resp_info.param.client_id = client->client_id; + if (clms_is_node_down(node_id) == true) { +LOG_NO("node_id = %d already down, no need sending sync respond", node_id); +if (client != nullptr) clms_client_delete(client->client_id); +return (uint32_t)ais_rc; + } + [Thuan] Why don't move this block before clms_client_new() then not need to clms_client_delete() rc = clms_mds_msg_send(cb, , >fr_dest, >mds_ctxt, MDS_SEND_PRIORITY_HIGH, NCSMDS_SVC_ID_CLMA); if (rc != NCSCC_RC_SUCCESS) { diff --git a/src/clm/clmd/clms_evt.h b/src/clm/clmd/clms_evt.h index 1005456..ef35cbc 100644 --- a/src/clm/clmd/clms_evt.h +++ b/src/clm/clmd/clms_evt.h @@ -92,6 +92,7 @@ extern uint32_t clms_clmresp_ok(CLMS_CB *cb, CLMS_CLUSTER_NODE *op_node, CLMS_TRACK_INFO *trkrec); extern uint32_t clms_remove_clma_down_rec(CLMS_CB *cb, MDS_DEST mds_dest); extern void clms_remove_node_down_rec(SaClmNodeIdT node_id); +extern bool clms_is_node_down(SaClmNodeIdT node_id); extern uint32_t clms_node_add(CLMS_CLUSTER_NODE *node, int i); extern void clms_clmresp_error_timeout(CLMS_CB *cb, CLMS_CLUSTER_NODE *node); extern bool clms_clma_entry_valid(CLMS_CB *cb, MDS_DEST mds_dest); diff --git a/src/clm/clmd/clms_main.cc b/src/clm/clmd/clms_main.cc index ad6e12e..e2c4f21 100644 --- a/src/clm/clmd/clms_main.cc +++ b/src/clm/clmd/clms_main.cc @@ -245,6 +245,10 @@ uint32_t clms_cb_init(CLMS_CB *clms_cb) { if (pthread_mutex_init(_cb->scale_out_data_mutex, nullptr) != 0) { return NCSCC_RC_FAILURE; } + if (pthread_mutex_init(_cb->node_down_list_mutex, nullptr) != 0) { +return NCSCC_RC_FAILURE; + } +
[devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004]
clmd will not send sync respond to client if the node that client resided on down. This will avoid timeout when clmd send via mds. --- src/clm/clmd/clms_cb.h| 5 src/clm/clmd/clms_evt.cc | 35 ++- src/clm/clmd/clms_evt.h | 1 + src/clm/clmd/clms_main.cc | 4 src/clm/clmd/clms_mds.cc | 61 +++ 5 files changed, 105 insertions(+), 1 deletion(-) diff --git a/src/clm/clmd/clms_cb.h b/src/clm/clmd/clms_cb.h index 4d7fdc7..6999761 100644 --- a/src/clm/clmd/clms_cb.h +++ b/src/clm/clmd/clms_cb.h @@ -22,6 +22,7 @@ #include "osaf/config.h" #endif #include +#include #include #include #include @@ -238,6 +239,8 @@ typedef struct clms_cb_t { *node_down_list_head; /*NODE_DOWN record - Fix when active node goes down */ NODE_DOWN_LIST *node_down_list_tail; + // Node down list - Updated by MDS thread + std::list mds_node_down_list; bool is_impl_set; bool nid_started; /**< true if started by NID */ NCS_PATRICIA_TREE iplist; /* To temporarily store ipaddress information @@ -245,6 +248,8 @@ typedef struct clms_cb_t { /* Mutex protecting shared data used by the scale-out functionality */ pthread_mutex_t scale_out_data_mutex; + /* Mutex protecting shared data used by the delete/add node-id */ + pthread_mutex_t node_down_list_mutex; /* Number of occupied indices in the vectors pending_nodes[] and * pending_node_ids[] */ size_t no_of_pending_nodes; diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc index c2b83c2..08d4acd 100644 --- a/src/clm/clmd/clms_evt.cc +++ b/src/clm/clmd/clms_evt.cc @@ -17,7 +17,6 @@ * */ -#include "osaf/configmake.h" #include #include #include @@ -31,6 +30,9 @@ #include #include #include +#include +#include +#include "osaf/configmake.h" #include "base/logtrace.h" #include "base/ncsgl_defs.h" #include "base/osaf_utility.h" @@ -1514,6 +1516,31 @@ static uint32_t proc_node_get_async_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) { } /** + * Return true if mds node down exist + * @param node id + * + * @return bool + */ +bool clms_is_node_down(uint32_t node_id) { + TRACE_ENTER(); + bool found = false; + std::list::iterator it; + osaf_mutex_lock_ordie(_cb->node_down_list_mutex); + + for (it = clms_cb->mds_node_down_list.begin(); +it != clms_cb->mds_node_down_list.end(); ++it) { +if (*(*it) == node_id) { + found = true; + break; +} + } + + osaf_mutex_unlock_ordie(_cb->node_down_list_mutex); + TRACE_LEAVE(); + return found; +} + +/** * Handle a initialize message * @param cb * @param evt @@ -1556,6 +1583,12 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) { if (client != nullptr) msg.info.api_resp_info.param.client_id = client->client_id; + if (clms_is_node_down(node_id) == true) { +LOG_NO("node_id = %d already down, no need sending sync respond", node_id); +if (client != nullptr) clms_client_delete(client->client_id); +return (uint32_t)ais_rc; + } + rc = clms_mds_msg_send(cb, , >fr_dest, >mds_ctxt, MDS_SEND_PRIORITY_HIGH, NCSMDS_SVC_ID_CLMA); if (rc != NCSCC_RC_SUCCESS) { diff --git a/src/clm/clmd/clms_evt.h b/src/clm/clmd/clms_evt.h index 1005456..ef35cbc 100644 --- a/src/clm/clmd/clms_evt.h +++ b/src/clm/clmd/clms_evt.h @@ -92,6 +92,7 @@ extern uint32_t clms_clmresp_ok(CLMS_CB *cb, CLMS_CLUSTER_NODE *op_node, CLMS_TRACK_INFO *trkrec); extern uint32_t clms_remove_clma_down_rec(CLMS_CB *cb, MDS_DEST mds_dest); extern void clms_remove_node_down_rec(SaClmNodeIdT node_id); +extern bool clms_is_node_down(SaClmNodeIdT node_id); extern uint32_t clms_node_add(CLMS_CLUSTER_NODE *node, int i); extern void clms_clmresp_error_timeout(CLMS_CB *cb, CLMS_CLUSTER_NODE *node); extern bool clms_clma_entry_valid(CLMS_CB *cb, MDS_DEST mds_dest); diff --git a/src/clm/clmd/clms_main.cc b/src/clm/clmd/clms_main.cc index ad6e12e..e2c4f21 100644 --- a/src/clm/clmd/clms_main.cc +++ b/src/clm/clmd/clms_main.cc @@ -245,6 +245,10 @@ uint32_t clms_cb_init(CLMS_CB *clms_cb) { if (pthread_mutex_init(_cb->scale_out_data_mutex, nullptr) != 0) { return NCSCC_RC_FAILURE; } + if (pthread_mutex_init(_cb->node_down_list_mutex, nullptr) != 0) { +return NCSCC_RC_FAILURE; + } + clms_cb->no_of_pending_nodes = 0; clms_cb->no_of_inprogress_nodes = 0; for (int i = 0; i != (MAX_PENDING_NODES + 1); ++i) { diff --git a/src/clm/clmd/clms_mds.cc b/src/clm/clmd/clms_mds.cc index 58552cc..a9e004b 100644 --- a/src/clm/clmd/clms_mds.cc +++ b/src/clm/clmd/clms_mds.cc @@ -18,10 +18,13 @@ #include #include +#include +#include #include "base/logtrace.h" #include "base/ncsencdec_pub.h" #include "clm/clmd/clms.h" #include "clm/common/clmsv_enc_dec.h" +#include "base/osaf_utility.h" #define CLMS_SVC_PVT_SUBPART_VERSION 1 #define