Hi My comments [Lennart]
I will continue reviewing patch 2 Thanks Lennart > -----Original Message----- > From: mahesh.va...@oracle.com [mailto:mahesh.va...@oracle.com] > Sent: den 2 augusti 2016 07:18 > To: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au>; Lennart Lund > <lennart.l...@ericsson.com>; Anders Widell <anders.wid...@ericsson.com> > Cc: opensaf-devel@lists.sourceforge.net > Subject: [PATCH 1 of 3] lga: agent Cluster Membership (CLM) integration > [#1638] V2 > > osaf/libs/agents/saf/lga/lga.h | 2 + > osaf/libs/agents/saf/lga/lga_api.c | 47 ++++++++++++++++++++ > osaf/libs/agents/saf/lga/lga_mds.c | 69 > ++++++++++++++++++++++++++++++ > osaf/libs/agents/saf/lga/lga_util.c | 2 + > osaf/libs/common/logsv/include/lgsv_defs.h | 6 ++- > osaf/libs/common/logsv/include/lgsv_msg.h | 17 +++++- > 6 files changed, 138 insertions(+), 5 deletions(-) > > > V2 patch: > > Incorporated Anders Widell and Vu review comments , > for more details see the review comments posted on V1 patch. > Description: > > Form CLM integration is supported from Log Service A.02.02. > > At-least a A.02.02 LGA client will check CLM membership status of client's > node. > old LGA clients A.02.01 are always clm member. > > This patch enhanced the log service for Unavailability of the Log Service API > on a > Non-Member Node which will fail with SA_AIS_ERR_UNAVAILABLE. > > After this patch the Log Service does not provide service to processes on > cluster nodes that are not > in the cluster membership. > > If the node rejoins the cluster membership, processes executing on the > node will be > able to reinitialize new library handles and use the entire set of Log Service > APIs that > operate on these new handles; however, invocation of APIs that operate on > handles > acquired by any process before the node left the membership will continue > to fail with > SA_AIS_ERR_UNAVAILABLE (or with the special treatment described above > for > asynchronous calls) with the exception of saLogFinalize(), which is used to > free the > library handles and all resources associated with these handles. Hence, it is > recommended > for the processes to finalize the library handles as soon as the processes > detect that the node left the membership. > > Detailed README will be provide soon. > > Following are expected Log Service API behavior : > > Case1: On Non-Member Node, Log Service API will fail with code > SA_AIS_ERR_UNAVAILABLE (31) > Case2: On Member Node after recovered from Non-Member Node, Log > Service API will fail with code SA_AIS_ERR_UNAVAILABLE (31) > Case3: Non-Member Node + (Headless) Log Service API will fail with code > SA_AIS_ERR_UNAVAILABLE (31) > Case4: On Non-Member Node + (Headless) + (Head Joined) Log Service API > will fail with code SA_AIS_ERR_UNAVAILABLE (31) [Lennart] I assume you will move this info about the implementation/feature to the README file > > diff --git a/osaf/libs/agents/saf/lga/lga.h b/osaf/libs/agents/saf/lga/lga.h > --- a/osaf/libs/agents/saf/lga/lga.h > +++ b/osaf/libs/agents/saf/lga/lga.h > @@ -77,6 +77,7 @@ typedef struct lga_client_hdl_rec { > * Set when client is initialized an all > * streams are recovered > */ > + bool is_stale_client; /* Status of client based on the CLM status of > node.*/ > } lga_client_hdl_rec_t; > > /* States of the server */ > @@ -118,6 +119,7 @@ typedef struct { > /* LGS LGA sync params */ > int lgs_sync_awaited; > NCS_SEL_OBJ lgs_sync_sel; > + SaClmClusterChangesT clm_node_state; /*Reflects CLM status of > this node(for future use).*/ [Lennart] This type is defined in saClm.h but saClm.h is included indirectly via ncsencdec_pub.h, saClm.h should be included here directly > } lga_cb_t; > > /* lga_saf_api.c */ > diff --git a/osaf/libs/agents/saf/lga/lga_api.c > b/osaf/libs/agents/saf/lga/lga_api.c > --- a/osaf/libs/agents/saf/lga/lga_api.c > +++ b/osaf/libs/agents/saf/lga/lga_api.c > @@ -324,6 +324,16 @@ SaAisErrorT saLogSelectionObjectGet(SaLo > rc = SA_AIS_ERR_BAD_HANDLE; > goto done; > } > + > + osaf_mutex_lock_ordie(&lga_cb.cb_lock); > + /*Check CLM membership of node.*/ > + if (hdl_rec->is_stale_client == true) { > + TRACE("Node not CLM member or stale client"); > + ncshm_give_hdl(logHandle); > + rc = SA_AIS_ERR_UNAVAILABLE; > + goto done; > + } > + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); > > /* Obtain the selection object from the IPC queue */ > sel_obj = m_NCS_IPC_GET_SEL_OBJ(&hdl_rec->mbx); > @@ -381,6 +391,16 @@ SaAisErrorT saLogDispatch(SaLogHandleT l > goto done; > } > > + osaf_mutex_lock_ordie(&lga_cb.cb_lock); > + /*Check CLM membership of node.*/ > + if (hdl_rec->is_stale_client == true) { > + TRACE("Node not CLM member or stale client"); > + ncshm_give_hdl(logHandle); > + rc = SA_AIS_ERR_UNAVAILABLE; > + goto done; > + } > + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); [Lennart] Is it nessecary to lock this whole section or is it only is_stale_client that has to be protected? If "goto done;" is executed the mutex will not be unlocked! Also see comment for lgs_mdc.c (mutex not used). This applies to all API:s checking is_stale_client > + > if ((rc = lga_hdl_cbk_dispatch(&lga_cb, hdl_rec, dispatchFlags)) != > SA_AIS_OK) > TRACE("LGA_DISPATCH_FAILURE"); > > @@ -777,6 +797,15 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl > goto done; > } > > + osaf_mutex_lock_ordie(&lga_cb.cb_lock); > + /*Check CLM membership of node.*/ > + if (hdl_rec->is_stale_client == true) { > + TRACE("%s Node not CLM member or stale client", > __FUNCTION__); > + ais_rc = SA_AIS_ERR_UNAVAILABLE; > + goto done_give_hdl; > + } > + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); > + > /*** > * Handle states > * Synchronize with mds and recovery thread (mutex) > @@ -1202,6 +1231,15 @@ SaAisErrorT saLogWriteLogAsync(SaLogStre > goto done_give_hdl_stream; > } > > + osaf_mutex_lock_ordie(&lga_cb.cb_lock); > + /*Check CLM membership of node.*/ > + if (hdl_rec->is_stale_client == true) { > + TRACE("%s Node not CLM member or stale client", > __FUNCTION__); > + ais_rc = SA_AIS_ERR_UNAVAILABLE; > + goto done_give_hdl_all; > + } > + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); > + > if ((hdl_rec->reg_cbk.saLogWriteLogCallback == NULL) && (ackFlags > == SA_LOG_RECORD_WRITE_ACK)) { > TRACE("%s: Write Callback not registered", __FUNCTION__); > ais_rc = SA_AIS_ERR_INIT; > @@ -1350,6 +1388,15 @@ SaAisErrorT saLogStreamClose(SaLogStream > goto done_give_hdl_stream; > } > > + osaf_mutex_lock_ordie(&lga_cb.cb_lock); > + /*Check CLM membership of node.*/ > + if (hdl_rec->is_stale_client == true) { > + TRACE("Node not CLM member or stale client"); > + ais_rc = SA_AIS_ERR_UNAVAILABLE; > + goto rmv_stream; > + } > + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); > + > if (is_lga_state(LGA_NO_SERVER)) { > /* No server is available. Remove the stream from client > database. > * Server side will manage to release resources of this stream > when up. > diff --git a/osaf/libs/agents/saf/lga/lga_mds.c > b/osaf/libs/agents/saf/lga/lga_mds.c > --- a/osaf/libs/agents/saf/lga/lga_mds.c > +++ b/osaf/libs/agents/saf/lga/lga_mds.c > @@ -476,6 +476,41 @@ static uint32_t lga_lgs_msg_proc(lga_cb_ > } > break; > > + case LGSV_CLM_NODE_STATUS_CALLBACK: > + { > + lga_client_hdl_rec_t *lga_hdl_rec; > + > + > TRACE_2("LGSV_CLM_NODE_STATUS_CALLBACK > clm_node_status:%d", > + lgsv_msg- > >info.cbk_info.clm_node_status_cbk.clm_node_status); > + > + /** Lookup the hdl rec by client_id **/ > + if (NULL == (lga_hdl_rec = > + > lga_find_hdl_rec_by_regid(cb, lgsv_msg- > >info.cbk_info.lgs_client_id))) { > + TRACE("regid not found"); > + lga_msg_destroy(lgsv_msg); > + TRACE_LEAVE(); > + return NCSCC_RC_FAILURE; > + } > + cb->clm_node_state = lgsv_msg- > >info.cbk_info.clm_node_status_cbk.clm_node_status; > + //A client becomes stale if Node loses CLM > Membership. > + if (cb->clm_node_state != > SA_CLM_NODE_JOINED) { [Lennart] clm_node_state not protected by mutex. See lga_util.c > + /*If the node rejoins the cluster > membership, processes executing on the node will be > + able to reinitialize new library > handles and use the entire set of Log Service APIs that > + operate on these new handles; > however, invocation of APIs that operate on handles > + acquired by any process before the > node left the membership will continue to fail with > + SA_AIS_ERR_UNAVAILABLE (or with > the special treatment described above for > + asynchronous calls) with the > exception of saLogFinalize(), which is used to free the > + library handles and all resources > associated with these handles. Hence, it is recommended > + for the processes to finalize the > library handles as soon as the processes > + detect that the node left the > membership.*/ > + lga_hdl_rec->is_stale_client = true; [Lennart] Not protected by mutex. See lga_ais.c > + TRACE("CLM_NODE callback > is_stale_client: %d clm_node_state: %d", > + lga_hdl_rec- > >is_stale_client, cb->clm_node_state); > + } > + lga_msg_destroy(lgsv_msg); > + } > + break; > + > default: > TRACE("unknown type %d", lgsv_msg- > >info.cbk_info.type); > lga_msg_destroy(lgsv_msg); > @@ -820,6 +855,35 @@ static uint32_t lga_dec_write_cbk_msg(NC > } > > > /********************************************************** > ****************** > +Name : lga_dec_clm_node_status_cbk_msg > + > +Description : This routine decodes message > + > +Arguments : NCS_UBAID *msg, > +LGSV_MSG *msg > + > +Return Values : uint32_t > + > +Notes : None. > + > ********************************************************** > ********************/ > +static uint32_t lga_dec_clm_node_status_cbk_msg(NCS_UBAID *uba, > lgsv_msg_t *msg) > +{ > + uint8_t *p8; > + uint32_t total_bytes = 0; > + logsv_lga_clm_status_cbk_t *param = &msg- > >info.cbk_info.clm_node_status_cbk; > + uint8_t local_data[100]; > + > + osafassert(uba != NULL); > + > + p8 = ncs_dec_flatten_space(uba, local_data, 4); > + param->clm_node_status = ncs_decode_32bit(&p8); > + ncs_dec_skip_space(uba, 4); > + total_bytes += 4; > + > + return total_bytes; > +} > + > +/********************************************************* > ******************* > Name : lga_dec_lstr_open_sync_rsp_msg > > Description : This routine decodes a log stream open sync response > message > @@ -936,6 +1000,11 @@ static uint32_t lga_mds_dec(struct ncsmd > msg->info.cbk_info.lgs_client_id); > total_bytes += lga_dec_write_cbk_msg(uba, > msg); > break; > + case LGSV_CLM_NODE_STATUS_CALLBACK: > + TRACE_2("decode clm node status message, > lgs_client_id=%d", > + msg- > >info.cbk_info.lgs_client_id); > + total_bytes += > lga_dec_clm_node_status_cbk_msg(uba, msg); > + break; > default: > TRACE_2("Unknown callback type = %d!", > msg->info.cbk_info.type); > break; > diff --git a/osaf/libs/agents/saf/lga/lga_util.c > b/osaf/libs/agents/saf/lga/lga_util.c > --- a/osaf/libs/agents/saf/lga/lga_util.c > +++ b/osaf/libs/agents/saf/lga/lga_util.c > @@ -53,6 +53,7 @@ static unsigned int lga_create(void) > > osaf_mutex_lock_ordie(&lga_cb.cb_lock); > lga_cb.lgs_sync_awaited = 0; > + lga_cb.clm_node_state = SA_CLM_NODE_JOINED; [Lennart] Protected by mutex here but not in lga_mds.c > osaf_mutex_unlock_ordie(&lga_cb.cb_lock); > > /* No longer needed */ > @@ -702,6 +703,7 @@ lga_client_hdl_rec_t *lga_hdl_rec_add(lg > **/ > rec->lgs_client_id = client_id; > > + rec->is_stale_client = false; > /*** > * Initiate the recovery flags > * The setting means that the client is initialized and that there is > diff --git a/osaf/libs/common/logsv/include/lgsv_defs.h > b/osaf/libs/common/logsv/include/lgsv_defs.h > --- a/osaf/libs/common/logsv/include/lgsv_defs.h > +++ b/osaf/libs/common/logsv/include/lgsv_defs.h > @@ -20,7 +20,11 @@ > > #define LOG_RELEASE_CODE 'A' > #define LOG_MAJOR_VERSION 2 > -#define LOG_MINOR_VERSION 1 > +#define LOG_MINOR_VERSION 2 > + > +#define LOG_RELEASE_CODE_0 'A' > +#define LOG_MAJOR_VERSION_0 2 > +#define LOG_MINOR_VERSION_0 1 > > // Waiting time in library for sync send, unit 10ms > #define LGS_WAIT_TIME 1000 > diff --git a/osaf/libs/common/logsv/include/lgsv_msg.h > b/osaf/libs/common/logsv/include/lgsv_msg.h > --- a/osaf/libs/common/logsv/include/lgsv_msg.h > +++ b/osaf/libs/common/logsv/include/lgsv_msg.h > @@ -45,6 +45,7 @@ typedef enum { > /* LGSV Callback enums */ > typedef enum { > LGSV_WRITE_LOG_CALLBACK_IND = 0, > + LGSV_CLM_NODE_STATUS_CALLBACK = 1, > LGSV_LGS_CBK_MAX > } lgsv_cbk_msg_type_t; > > @@ -121,12 +122,20 @@ typedef struct { > SaAisErrorT error; > } lgsv_write_log_callback_ind_t; > > -/* wrapper structure for all the callbacks */ > +/*CLM node status callback structure for LGS*/ > +typedef struct logsv_loga_clm_status_param_tag { > + uint32_t clm_node_status; > +} logsv_lga_clm_status_cbk_t; > + > +/* wrapper structure for all the callbacks */ > typedef struct { > - lgsv_cbk_msg_type_t type; /* callback type */ > - uint32_t lgs_client_id; /* lgs client_id */ > - SaInvocationT inv; /* invocation value */ > + lgsv_cbk_msg_type_t type; /* callback type */ > + uint32_t lgs_client_id; /* lgs client_id */ > + SaInvocationT inv; /* invocation value */ > + /* union {*/ > lgsv_write_log_callback_ind_t write_cbk; > + logsv_lga_clm_status_cbk_t clm_node_status_cbk; > + /* } param; */ > } lgsv_cbk_info_t; > > /* API Response parameter definitions */ ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel