Hi Lennart, Thanks for the comments.
One of your major comment is `lga_hdl_rec->is_stale_client` & `cb->clm_node_state` is not protected in lga_mds.c , but if you see the lga_mds_rcv() , lga_lgs_msg_proc() it self is protected , so all the chances in lga_lgs_msg_proc() are protected. I understand that the existing code is little bit miss leading. The other comments I will address in V3 Patch. ========================================================================== static uint32_t lga_mds_rcv(struct ncsmds_callback_info *mds_cb_info) { lgsv_msg_t *lgsv_msg = (lgsv_msg_t *)mds_cb_info->info.receive.i_msg; uint32_t rc; osaf_mutex_lock_ordie(&lga_cb.cb_lock); /* process the message */ rc = lga_lgs_msg_proc(&lga_cb, lgsv_msg, mds_cb_info->info.receive.i_priority); if (rc != NCSCC_RC_SUCCESS) { TRACE_2("lga_lgs_msg_proc returned: %d", rc); } osaf_mutex_unlock_ordie(&lga_cb.cb_lock); return rc; } ========================================================================== -AVM On 8/2/2016 4:21 PM, Lennart Lund wrote: > Hi > > My comments [Lennart] > > I will continue reviewing patch 2 > > Thanks > Lennart > >> -----Original Message----- >> From: mahesh.va...@oracle.com [mailto:mahesh.va...@oracle.com] >> Sent: den 2 augusti 2016 07:18 >> To: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au>; Lennart Lund >> <lennart.l...@ericsson.com>; Anders Widell <anders.wid...@ericsson.com> >> Cc: opensaf-devel@lists.sourceforge.net >> Subject: [PATCH 1 of 3] lga: agent Cluster Membership (CLM) integration >> [#1638] V2 >> >> osaf/libs/agents/saf/lga/lga.h | 2 + >> osaf/libs/agents/saf/lga/lga_api.c | 47 ++++++++++++++++++++ >> osaf/libs/agents/saf/lga/lga_mds.c | 69 >> ++++++++++++++++++++++++++++++ >> osaf/libs/agents/saf/lga/lga_util.c | 2 + >> osaf/libs/common/logsv/include/lgsv_defs.h | 6 ++- >> osaf/libs/common/logsv/include/lgsv_msg.h | 17 +++++- >> 6 files changed, 138 insertions(+), 5 deletions(-) >> >> >> V2 patch: >> >> Incorporated Anders Widell and Vu review comments , >> for more details see the review comments posted on V1 patch. >> Description: >> >> Form CLM integration is supported from Log Service A.02.02. >> >> At-least a A.02.02 LGA client will check CLM membership status of client's >> node. >> old LGA clients A.02.01 are always clm member. >> >> This patch enhanced the log service for Unavailability of the Log Service API >> on a >> Non-Member Node which will fail with SA_AIS_ERR_UNAVAILABLE. >> >> After this patch the Log Service does not provide service to processes on >> cluster nodes that are not >> in the cluster membership. >> >> If the node rejoins the cluster membership, processes executing on the >> node will be >> able to reinitialize new library handles and use the entire set of Log >> Service >> APIs that >> operate on these new handles; however, invocation of APIs that operate on >> handles >> acquired by any process before the node left the membership will continue >> to fail with >> SA_AIS_ERR_UNAVAILABLE (or with the special treatment described above >> for >> asynchronous calls) with the exception of saLogFinalize(), which is used to >> free the >> library handles and all resources associated with these handles. Hence, it is >> recommended >> for the processes to finalize the library handles as soon as the processes >> detect that the node left the membership. >> >> Detailed README will be provide soon. >> >> Following are expected Log Service API behavior : >> >> Case1: On Non-Member Node, Log Service API will fail with code >> SA_AIS_ERR_UNAVAILABLE (31) >> Case2: On Member Node after recovered from Non-Member Node, Log >> Service API will fail with code SA_AIS_ERR_UNAVAILABLE (31) >> Case3: Non-Member Node + (Headless) Log Service API will fail with code >> SA_AIS_ERR_UNAVAILABLE (31) >> Case4: On Non-Member Node + (Headless) + (Head Joined) Log Service API >> will fail with code SA_AIS_ERR_UNAVAILABLE (31) > [Lennart] I assume you will move this info about the implementation/feature > to the README file [AVM] Ok , I will add these as expected as behavior to README. >> diff --git a/osaf/libs/agents/saf/lga/lga.h b/osaf/libs/agents/saf/lga/lga.h >> --- a/osaf/libs/agents/saf/lga/lga.h >> +++ b/osaf/libs/agents/saf/lga/lga.h >> @@ -77,6 +77,7 @@ typedef struct lga_client_hdl_rec { >> * Set when client is initialized an all >> * streams are recovered >> */ >> + bool is_stale_client; /* Status of client based on the CLM status of >> node.*/ >> } lga_client_hdl_rec_t; >> >> /* States of the server */ >> @@ -118,6 +119,7 @@ typedef struct { >> /* LGS LGA sync params */ >> int lgs_sync_awaited; >> NCS_SEL_OBJ lgs_sync_sel; >> + SaClmClusterChangesT clm_node_state; /*Reflects CLM status of >> this node(for future use).*/ > [Lennart] This type is defined in saClm.h but saClm.h is included indirectly > via ncsencdec_pub.h, saClm.h should be included here directly [AVM] Done > >> } lga_cb_t; >> >> /* lga_saf_api.c */ >> diff --git a/osaf/libs/agents/saf/lga/lga_api.c >> b/osaf/libs/agents/saf/lga/lga_api.c >> --- a/osaf/libs/agents/saf/lga/lga_api.c >> +++ b/osaf/libs/agents/saf/lga/lga_api.c >> @@ -324,6 +324,16 @@ SaAisErrorT saLogSelectionObjectGet(SaLo >> rc = SA_AIS_ERR_BAD_HANDLE; >> goto done; >> } >> + >> + osaf_mutex_lock_ordie(&lga_cb.cb_lock); >> + /*Check CLM membership of node.*/ >> + if (hdl_rec->is_stale_client == true) { >> + TRACE("Node not CLM member or stale client"); >> + ncshm_give_hdl(logHandle); >> + rc = SA_AIS_ERR_UNAVAILABLE; >> + goto done; >> + } >> + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); >> >> /* Obtain the selection object from the IPC queue */ >> sel_obj = m_NCS_IPC_GET_SEL_OBJ(&hdl_rec->mbx); >> @@ -381,6 +391,16 @@ SaAisErrorT saLogDispatch(SaLogHandleT l >> goto done; >> } >> >> + osaf_mutex_lock_ordie(&lga_cb.cb_lock); >> + /*Check CLM membership of node.*/ >> + if (hdl_rec->is_stale_client == true) { >> + TRACE("Node not CLM member or stale client"); >> + ncshm_give_hdl(logHandle); >> + rc = SA_AIS_ERR_UNAVAILABLE; >> + goto done; >> + } >> + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); > [Lennart] Is it nessecary to lock this whole section or is it only > is_stale_client that has to be protected? > If "goto done;" is executed the mutex will not be unlocked! Also see comment > for lgs_mdc.c (mutex not used). > This applies to all API:s checking is_stale_client [AVM] Corrected Unlock on go to done. in lga_mds.c lga_lgs_msg_proc() it self is protected with mutex_lock. >> + >> if ((rc = lga_hdl_cbk_dispatch(&lga_cb, hdl_rec, dispatchFlags)) != >> SA_AIS_OK) >> TRACE("LGA_DISPATCH_FAILURE"); >> >> @@ -777,6 +797,15 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl >> goto done; >> } >> >> + osaf_mutex_lock_ordie(&lga_cb.cb_lock); >> + /*Check CLM membership of node.*/ >> + if (hdl_rec->is_stale_client == true) { >> + TRACE("%s Node not CLM member or stale client", >> __FUNCTION__); >> + ais_rc = SA_AIS_ERR_UNAVAILABLE; >> + goto done_give_hdl; >> + } >> + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); >> + >> /*** >> * Handle states >> * Synchronize with mds and recovery thread (mutex) >> @@ -1202,6 +1231,15 @@ SaAisErrorT saLogWriteLogAsync(SaLogStre >> goto done_give_hdl_stream; >> } >> >> + osaf_mutex_lock_ordie(&lga_cb.cb_lock); >> + /*Check CLM membership of node.*/ >> + if (hdl_rec->is_stale_client == true) { >> + TRACE("%s Node not CLM member or stale client", >> __FUNCTION__); >> + ais_rc = SA_AIS_ERR_UNAVAILABLE; >> + goto done_give_hdl_all; >> + } >> + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); >> + >> if ((hdl_rec->reg_cbk.saLogWriteLogCallback == NULL) && (ackFlags >> == SA_LOG_RECORD_WRITE_ACK)) { >> TRACE("%s: Write Callback not registered", __FUNCTION__); >> ais_rc = SA_AIS_ERR_INIT; >> @@ -1350,6 +1388,15 @@ SaAisErrorT saLogStreamClose(SaLogStream >> goto done_give_hdl_stream; >> } >> >> + osaf_mutex_lock_ordie(&lga_cb.cb_lock); >> + /*Check CLM membership of node.*/ >> + if (hdl_rec->is_stale_client == true) { >> + TRACE("Node not CLM member or stale client"); >> + ais_rc = SA_AIS_ERR_UNAVAILABLE; >> + goto rmv_stream; >> + } >> + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); >> + >> if (is_lga_state(LGA_NO_SERVER)) { >> /* No server is available. Remove the stream from client >> database. >> * Server side will manage to release resources of this stream >> when up. >> diff --git a/osaf/libs/agents/saf/lga/lga_mds.c >> b/osaf/libs/agents/saf/lga/lga_mds.c >> --- a/osaf/libs/agents/saf/lga/lga_mds.c >> +++ b/osaf/libs/agents/saf/lga/lga_mds.c >> @@ -476,6 +476,41 @@ static uint32_t lga_lgs_msg_proc(lga_cb_ >> } >> break; >> >> + case LGSV_CLM_NODE_STATUS_CALLBACK: >> + { >> + lga_client_hdl_rec_t *lga_hdl_rec; >> + >> + >> TRACE_2("LGSV_CLM_NODE_STATUS_CALLBACK >> clm_node_status:%d", >> + lgsv_msg- >>> info.cbk_info.clm_node_status_cbk.clm_node_status); >> + >> + /** Lookup the hdl rec by client_id **/ >> + if (NULL == (lga_hdl_rec = >> + >> lga_find_hdl_rec_by_regid(cb, lgsv_msg- >>> info.cbk_info.lgs_client_id))) { >> + TRACE("regid not found"); >> + lga_msg_destroy(lgsv_msg); >> + TRACE_LEAVE(); >> + return NCSCC_RC_FAILURE; >> + } >> + cb->clm_node_state = lgsv_msg- >>> info.cbk_info.clm_node_status_cbk.clm_node_status; >> + //A client becomes stale if Node loses CLM >> Membership. >> + if (cb->clm_node_state != >> SA_CLM_NODE_JOINED) { > [Lennart] clm_node_state not protected by mutex. See lga_util.c [AVM] In lga_mds.c lga_lgs_msg_proc() it self is protected with mutex_lock in function lga_mds_rcv(). > >> + /*If the node rejoins the cluster >> membership, processes executing on the node will be >> + able to reinitialize new library >> handles and use the entire set of Log Service APIs that >> + operate on these new handles; >> however, invocation of APIs that operate on handles >> + acquired by any process before the >> node left the membership will continue to fail with >> + SA_AIS_ERR_UNAVAILABLE (or with >> the special treatment described above for >> + asynchronous calls) with the >> exception of saLogFinalize(), which is used to free the >> + library handles and all resources >> associated with these handles. Hence, it is recommended >> + for the processes to finalize the >> library handles as soon as the processes >> + detect that the node left the >> membership.*/ >> + lga_hdl_rec->is_stale_client = true; > [Lennart] Not protected by mutex. See lga_ais.c [AVM] In lga_mds.c lga_lgs_msg_proc() it self is protected with mutex_lock in function lga_mds_rcv(). > >> + TRACE("CLM_NODE callback >> is_stale_client: %d clm_node_state: %d", >> + lga_hdl_rec- >>> is_stale_client, cb->clm_node_state); >> + } >> + lga_msg_destroy(lgsv_msg); >> + } >> + break; >> + >> default: >> TRACE("unknown type %d", lgsv_msg- >>> info.cbk_info.type); >> lga_msg_destroy(lgsv_msg); >> @@ -820,6 +855,35 @@ static uint32_t lga_dec_write_cbk_msg(NC >> } >> >> >> /********************************************************** >> ****************** >> +Name : lga_dec_clm_node_status_cbk_msg >> + >> +Description : This routine decodes message >> + >> +Arguments : NCS_UBAID *msg, >> +LGSV_MSG *msg >> + >> +Return Values : uint32_t >> + >> +Notes : None. >> + >> ********************************************************** >> ********************/ >> +static uint32_t lga_dec_clm_node_status_cbk_msg(NCS_UBAID *uba, >> lgsv_msg_t *msg) >> +{ >> + uint8_t *p8; >> + uint32_t total_bytes = 0; >> + logsv_lga_clm_status_cbk_t *param = &msg- >>> info.cbk_info.clm_node_status_cbk; >> + uint8_t local_data[100]; >> + >> + osafassert(uba != NULL); >> + >> + p8 = ncs_dec_flatten_space(uba, local_data, 4); >> + param->clm_node_status = ncs_decode_32bit(&p8); >> + ncs_dec_skip_space(uba, 4); >> + total_bytes += 4; >> + >> + return total_bytes; >> +} >> + >> +/********************************************************* >> ******************* >> Name : lga_dec_lstr_open_sync_rsp_msg >> >> Description : This routine decodes a log stream open sync response >> message >> @@ -936,6 +1000,11 @@ static uint32_t lga_mds_dec(struct ncsmd >> msg->info.cbk_info.lgs_client_id); >> total_bytes += lga_dec_write_cbk_msg(uba, >> msg); >> break; >> + case LGSV_CLM_NODE_STATUS_CALLBACK: >> + TRACE_2("decode clm node status message, >> lgs_client_id=%d", >> + msg- >>> info.cbk_info.lgs_client_id); >> + total_bytes += >> lga_dec_clm_node_status_cbk_msg(uba, msg); >> + break; >> default: >> TRACE_2("Unknown callback type = %d!", >> msg->info.cbk_info.type); >> break; >> diff --git a/osaf/libs/agents/saf/lga/lga_util.c >> b/osaf/libs/agents/saf/lga/lga_util.c >> --- a/osaf/libs/agents/saf/lga/lga_util.c >> +++ b/osaf/libs/agents/saf/lga/lga_util.c >> @@ -53,6 +53,7 @@ static unsigned int lga_create(void) >> >> osaf_mutex_lock_ordie(&lga_cb.cb_lock); >> lga_cb.lgs_sync_awaited = 0; >> + lga_cb.clm_node_state = SA_CLM_NODE_JOINED; > [Lennart] Protected by mutex here but not in lga_mds.c [AVM] In lga_mds.c lga_lgs_msg_proc() it self is protected with mutex_lock in function lga_mds_rcv(). > >> osaf_mutex_unlock_ordie(&lga_cb.cb_lock); >> >> /* No longer needed */ >> @@ -702,6 +703,7 @@ lga_client_hdl_rec_t *lga_hdl_rec_add(lg >> **/ >> rec->lgs_client_id = client_id; >> >> + rec->is_stale_client = false; >> /*** >> * Initiate the recovery flags >> * The setting means that the client is initialized and that there is >> diff --git a/osaf/libs/common/logsv/include/lgsv_defs.h >> b/osaf/libs/common/logsv/include/lgsv_defs.h >> --- a/osaf/libs/common/logsv/include/lgsv_defs.h >> +++ b/osaf/libs/common/logsv/include/lgsv_defs.h >> @@ -20,7 +20,11 @@ >> >> #define LOG_RELEASE_CODE 'A' >> #define LOG_MAJOR_VERSION 2 >> -#define LOG_MINOR_VERSION 1 >> +#define LOG_MINOR_VERSION 2 >> + >> +#define LOG_RELEASE_CODE_0 'A' >> +#define LOG_MAJOR_VERSION_0 2 >> +#define LOG_MINOR_VERSION_0 1 >> >> // Waiting time in library for sync send, unit 10ms >> #define LGS_WAIT_TIME 1000 >> diff --git a/osaf/libs/common/logsv/include/lgsv_msg.h >> b/osaf/libs/common/logsv/include/lgsv_msg.h >> --- a/osaf/libs/common/logsv/include/lgsv_msg.h >> +++ b/osaf/libs/common/logsv/include/lgsv_msg.h >> @@ -45,6 +45,7 @@ typedef enum { >> /* LGSV Callback enums */ >> typedef enum { >> LGSV_WRITE_LOG_CALLBACK_IND = 0, >> + LGSV_CLM_NODE_STATUS_CALLBACK = 1, >> LGSV_LGS_CBK_MAX >> } lgsv_cbk_msg_type_t; >> >> @@ -121,12 +122,20 @@ typedef struct { >> SaAisErrorT error; >> } lgsv_write_log_callback_ind_t; >> >> -/* wrapper structure for all the callbacks */ >> +/*CLM node status callback structure for LGS*/ >> +typedef struct logsv_loga_clm_status_param_tag { >> + uint32_t clm_node_status; >> +} logsv_lga_clm_status_cbk_t; >> + >> +/* wrapper structure for all the callbacks */ >> typedef struct { >> - lgsv_cbk_msg_type_t type; /* callback type */ >> - uint32_t lgs_client_id; /* lgs client_id */ >> - SaInvocationT inv; /* invocation value */ >> + lgsv_cbk_msg_type_t type; /* callback type */ >> + uint32_t lgs_client_id; /* lgs client_id */ >> + SaInvocationT inv; /* invocation value */ >> + /* union {*/ >> lgsv_write_log_callback_ind_t write_cbk; >> + logsv_lga_clm_status_cbk_t clm_node_status_cbk; >> + /* } param; */ >> } lgsv_cbk_info_t; >> >> /* API Response parameter definitions */ ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel