Hi Vu, Thank you very much for your comment. I will get back to you after reviewing each comment.
-AVM On 7/29/2016 9:20 AM, Vu Minh Nguyen wrote: > Hi Mahesh, > > I have few comments, started with [Vu]: > > General: some places use tabs, some other places use spaces. > > Regards, Vu > >> -----Original Message----- >> From: [email protected] [mailto:[email protected]] >> Sent: Tuesday, July 26, 2016 3:43 PM >> To: [email protected]; [email protected] >> Cc: [email protected] >> Subject: [PATCH 1 of 3] lga: agent Cluster Membership (CLM) integration >> [#1638] V1 >> >> osaf/libs/agents/saf/lga/lga.h | 2 + >> osaf/libs/agents/saf/lga/lga_api.c | 37 ++++++++++++++ >> osaf/libs/agents/saf/lga/lga_mds.c | 75 >> ++++++++++++++++++++++++++++++ >> osaf/libs/agents/saf/lga/lga_util.c | 2 + >> osaf/libs/common/logsv/include/lgsv_defs.h | 6 ++- >> osaf/libs/common/logsv/include/lgsv_msg.h | 17 +++++- >> 6 files changed, 134 insertions(+), 5 deletions(-) >> >> >> Description: >> >> Form CLM integration is supported from Log Service A.02.02. >> >> At-least a A.02.02 LGA client will check CLM membership status of client's >> node. >> old LGA clients A.02.01 are always clm member. >> >> This patch enhanced the log service for Unavailability of the Log Service > API >> on a >> Non-Member Node which will fail with SA_AIS_ERR_UNAVAILABLE. >> >> After this patch the Log Service does not provide service to processes on >> cluster nodes that are not >> in the cluster membership. >> >> If the node rejoins the cluster membership, processes executing on the > node >> will be >> able to reinitialize new library handles and use the entire set of Log > Service >> APIs that >> operate on these new handles; however, invocation of APIs that operate on >> handles >> acquired by any process before the node left the membership will continue >> to fail with >> SA_AIS_ERR_UNAVAILABLE (or with the special treatment described above >> for >> asynchronous calls) with the exception of saLogFinalize(), which is used > to >> free the >> library handles and all resources associated with these handles. Hence, it > is >> recommended >> for the processes to finalize the library handles as soon as the processes >> detect that the node left the membership. >> >> Detailed README will be provide soon. >> >> Following are expected Log Service API behavior : >> >> Case1: On Non-Member Node, Log Service API will fail with code >> SA_AIS_ERR_UNAVAILABLE (31) >> Case2: On Member Node after recovered from Non-Member Node, Log >> Service API will fail with code SA_AIS_ERR_UNAVAILABLE (31) >> Case3: Non-Member Node + (Headless) Log Service API will fail with code >> SA_AIS_ERR_UNAVAILABLE (31) >> Case4: On Non-Member Node + (Headless) + (Head Joined) Log Service API >> will fail with code SA_AIS_ERR_UNAVAILABLE (31) >> >> diff --git a/osaf/libs/agents/saf/lga/lga.h > b/osaf/libs/agents/saf/lga/lga.h >> --- a/osaf/libs/agents/saf/lga/lga.h >> +++ b/osaf/libs/agents/saf/lga/lga.h >> @@ -77,6 +77,7 @@ typedef struct lga_client_hdl_rec { >> * Set when client is initialized an all >> * streams are recovered >> */ >> + bool is_stale_client; /* Status of client based on the CLM status > of >> node.*/ >> } lga_client_hdl_rec_t; >> >> /* States of the server */ >> @@ -118,6 +119,7 @@ typedef struct { >> /* LGS LGA sync params */ >> int lgs_sync_awaited; >> NCS_SEL_OBJ lgs_sync_sel; >> + SaClmClusterChangesT clm_node_state; /*Reflects CLM status of >> this node(for future use).*/ >> } lga_cb_t; >> >> /* lga_saf_api.c */ >> diff --git a/osaf/libs/agents/saf/lga/lga_api.c >> b/osaf/libs/agents/saf/lga/lga_api.c >> --- a/osaf/libs/agents/saf/lga/lga_api.c >> +++ b/osaf/libs/agents/saf/lga/lga_api.c >> @@ -325,6 +325,14 @@ SaAisErrorT saLogSelectionObjectGet(SaLo >> goto done; >> } >> >> + /*Check CLM membership of node.*/ >> + if (hdl_rec->is_stale_client == true) { >> + TRACE("Node not CLM member or stale client"); >> + ncshm_give_hdl(logHandle); >> + rc = SA_AIS_ERR_UNAVAILABLE; >> + goto done; >> + } >> + >> /* Obtain the selection object from the IPC queue */ >> sel_obj = m_NCS_IPC_GET_SEL_OBJ(&hdl_rec->mbx); >> >> @@ -381,6 +389,14 @@ SaAisErrorT saLogDispatch(SaLogHandleT l >> goto done; >> } >> >> + /*Check CLM membership of node.*/ >> + if (hdl_rec->is_stale_client == true) { >> + TRACE("Node not CLM member or stale client"); >> + ncshm_give_hdl(logHandle); >> + rc = SA_AIS_ERR_UNAVAILABLE; >> + goto done; >> + } >> + >> if ((rc = lga_hdl_cbk_dispatch(&lga_cb, hdl_rec, dispatchFlags)) != >> SA_AIS_OK) >> TRACE("LGA_DISPATCH_FAILURE"); >> >> @@ -776,6 +792,13 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl >> ais_rc = SA_AIS_ERR_BAD_HANDLE; >> goto done; >> } >> + >> + /*Check CLM membership of node.*/ >> + if (hdl_rec->is_stale_client == true) { >> + TRACE("%s Node not CLM member or stale client", >> __FUNCTION__); >> + ais_rc = SA_AIS_ERR_UNAVAILABLE; >> + goto done_give_hdl; >> + } >> >> /*** >> * Handle states >> @@ -1202,6 +1225,13 @@ SaAisErrorT saLogWriteLogAsync(SaLogStre >> goto done_give_hdl_stream; >> } >> >> + /*Check CLM membership of node.*/ >> + if (hdl_rec->is_stale_client == true) { >> + TRACE("%s Node not CLM member or stale client", >> __FUNCTION__); >> + ais_rc = SA_AIS_ERR_UNAVAILABLE; >> + goto done_give_hdl_all; >> + } > [Vu] The variable `is_state_client` is set in the MDS thread and referred to > in LOG API calling thread. So, seems there is an timing issue here. > E.g: > saLogWriteLogAsync() passes above CLM membership check but after that, right > before sending message to MDS layer, > the process receive `node leave` event. If this is the case, > saLogWriteLogAsync() returns OK. > >> + >> if ((hdl_rec->reg_cbk.saLogWriteLogCallback == NULL) && (ackFlags >> == SA_LOG_RECORD_WRITE_ACK)) { >> TRACE("%s: Write Callback not registered", __FUNCTION__); >> ais_rc = SA_AIS_ERR_INIT; >> @@ -1350,6 +1380,13 @@ SaAisErrorT saLogStreamClose(SaLogStream >> goto done_give_hdl_stream; >> } >> >> + /*Check CLM membership of node.*/ >> + if (hdl_rec->is_stale_client == true) { >> + TRACE("Node not CLM member or stale client"); >> + ais_rc = SA_AIS_ERR_UNAVAILABLE; >> + goto rmv_stream; >> + } >> + >> if (is_lga_state(LGA_NO_SERVER)) { >> /* No server is available. Remove the stream from client >> database. >> * Server side will manage to release resources of this > stream >> when up. >> diff --git a/osaf/libs/agents/saf/lga/lga_mds.c >> b/osaf/libs/agents/saf/lga/lga_mds.c >> --- a/osaf/libs/agents/saf/lga/lga_mds.c >> +++ b/osaf/libs/agents/saf/lga/lga_mds.c >> @@ -476,6 +476,47 @@ static uint32_t lga_lgs_msg_proc(lga_cb_ >> } >> break; >> >> + case LGSV_CLM_NODE_STATUS_CALLBACK: >> + { >> + lga_client_hdl_rec_t *lga_hdl_rec; >> + >> + >> TRACE_2("LGSV_CLM_NODE_STATUS_CALLBACK >> clm_node_status:%d", >> + lgsv_msg- >>> info.cbk_info.clm_node_status_cbk.clm_node_status); >> + >> + /** Create the chan hdl record here before >> + ** queing this message onto the priority >> queue >> + ** so that the dispatch by the application > to >> fetch >> + ** the callback is instantaneous. >> + **/ > [Vu] These comments is not relevant to below code. Should remove it. >> + >> + /** Lookup the hdl rec by client_id **/ >> + if (NULL == (lga_hdl_rec = >> + >> lga_find_hdl_rec_by_regid(cb, lgsv_msg- >>> info.cbk_info.lgs_client_id))) { >> + TRACE("regid not found"); >> + lga_msg_destroy(lgsv_msg); >> + TRACE_LEAVE(); >> + return NCSCC_RC_FAILURE; >> + } > [Vu] Node leave is an event at node level. I don't think above check is > necessary or at least not returning ERR, but keep going to next check. > And as the event is at node level, consider to put `clm_node_state` and ` > is_stale_client` to an global data type. > >> + cb->clm_node_state = lgsv_msg- >>> info.cbk_info.clm_node_status_cbk.clm_node_status; >> + //A client becomes stale if Node loses > CLM Membership. >> + if (cb->clm_node_state != > SA_CLM_NODE_JOINED) { >> + /*If the node rejoins the cluster >> membership, processes executing on the node will be >> + able to reinitialize new library >> handles and use the entire set of Log Service APIs that >> + operate on these new handles; >> however, invocation of APIs that operate on handles >> + acquired by any process before the >> node left the membership will continue to fail with >> + SA_AIS_ERR_UNAVAILABLE (or with >> the special treatment described above for >> + asynchronous calls) with the >> exception of saLogFinalize(), which is used to free the >> + library handles and all resources >> associated with these handles. Hence, it is recommended >> + for the processes to finalize the >> library handles as soon as the processes >> + detect that the node left the >> membership.*/ >> + lga_hdl_rec->is_stale_client = true; >> + TRACE("CLM_NODE callback > is_stale_client: %d >> clm_node_state: %d", >> + > lga_hdl_rec->is_stale_client, cb- >>> clm_node_state); >> + } >> + lga_msg_destroy(lgsv_msg); >> + } >> + break; >> + >> default: >> TRACE("unknown type %d", lgsv_msg- >>> info.cbk_info.type); >> lga_msg_destroy(lgsv_msg); >> @@ -820,6 +861,35 @@ static uint32_t lga_dec_write_cbk_msg(NC >> } >> >> >> /************************************************************* >> *************** >> + Name : lga_dec_clm_node_status_cbk_msg >> + >> + Description : This routine decodes message >> + >> + Arguments : NCS_UBAID *msg, >> + LGSV_MSG *msg >> + >> + Return Values : uint32_t >> + >> + Notes : None. >> +************************************************************* >> *****************/ >> +static uint32_t lga_dec_clm_node_status_cbk_msg(NCS_UBAID *uba, >> lgsv_msg_t *msg) >> +{ >> + uint8_t *p8; >> + uint32_t total_bytes = 0; >> + logsv_lga_clm_status_cbk_t *param = &msg- >>> info.cbk_info.clm_node_status_cbk; >> + uint8_t local_data[100]; >> + >> + osafassert(uba != NULL); >> + >> + p8 = ncs_dec_flatten_space(uba, local_data, 4); >> + param->clm_node_status = ncs_decode_32bit(&p8); >> + ncs_dec_skip_space(uba, 4); >> + total_bytes += 4; >> + >> + return total_bytes; >> +} >> + >> +/************************************************************ >> **************** >> Name : lga_dec_lstr_open_sync_rsp_msg >> >> Description : This routine decodes a log stream open sync response >> message >> @@ -936,6 +1006,11 @@ static uint32_t lga_mds_dec(struct ncsmd >> msg->info.cbk_info.lgs_client_id); >> total_bytes += lga_dec_write_cbk_msg(uba, >> msg); >> break; >> + case LGSV_CLM_NODE_STATUS_CALLBACK: >> + TRACE_2("decode clm node status message, >> lgs_client_id=%d", >> + msg- >>> info.cbk_info.lgs_client_id); >> + total_bytes += >> lga_dec_clm_node_status_cbk_msg(uba, msg); >> + break; >> default: >> TRACE_2("Unknown callback type = %d!", >> msg->info.cbk_info.type); >> break; >> diff --git a/osaf/libs/agents/saf/lga/lga_util.c >> b/osaf/libs/agents/saf/lga/lga_util.c >> --- a/osaf/libs/agents/saf/lga/lga_util.c >> +++ b/osaf/libs/agents/saf/lga/lga_util.c >> @@ -53,6 +53,7 @@ static unsigned int lga_create(void) >> >> osaf_mutex_lock_ordie(&lga_cb.cb_lock); >> lga_cb.lgs_sync_awaited = 0; >> + lga_cb.clm_node_state = SA_CLM_NODE_JOINED; >> osaf_mutex_unlock_ordie(&lga_cb.cb_lock); >> >> /* No longer needed */ >> @@ -702,6 +703,7 @@ lga_client_hdl_rec_t *lga_hdl_rec_add(lg >> **/ >> rec->lgs_client_id = client_id; >> >> + rec->is_stale_client = false; >> /*** >> * Initiate the recovery flags >> * The setting means that the client is initialized and that there > is >> diff --git a/osaf/libs/common/logsv/include/lgsv_defs.h >> b/osaf/libs/common/logsv/include/lgsv_defs.h >> --- a/osaf/libs/common/logsv/include/lgsv_defs.h >> +++ b/osaf/libs/common/logsv/include/lgsv_defs.h >> @@ -20,7 +20,11 @@ >> >> #define LOG_RELEASE_CODE 'A' >> #define LOG_MAJOR_VERSION 2 >> -#define LOG_MINOR_VERSION 1 >> +#define LOG_MINOR_VERSION 2 >> + >> +#define LOG_RELEASE_CODE_0 'A' >> +#define LOG_MAJOR_VERSION_0 2 >> +#define LOG_MINOR_VERSION_0 1 > [Vu] These three macros are only used in ` is_client_clm_member()`. Consider > to remove them, but use internal variable SaVersionT instead. >> // Waiting time in library for sync send, unit 10ms >> #define LGS_WAIT_TIME 1000 >> diff --git a/osaf/libs/common/logsv/include/lgsv_msg.h >> b/osaf/libs/common/logsv/include/lgsv_msg.h >> --- a/osaf/libs/common/logsv/include/lgsv_msg.h >> +++ b/osaf/libs/common/logsv/include/lgsv_msg.h >> @@ -45,6 +45,7 @@ typedef enum { >> /* LGSV Callback enums */ >> typedef enum { >> LGSV_WRITE_LOG_CALLBACK_IND = 0, >> + LGSV_CLM_NODE_STATUS_CALLBACK = 1, >> LGSV_LGS_CBK_MAX >> } lgsv_cbk_msg_type_t; >> >> @@ -121,12 +122,20 @@ typedef struct { >> SaAisErrorT error; >> } lgsv_write_log_callback_ind_t; >> >> -/* wrapper structure for all the callbacks */ >> +/*CLM node status callback structure for LGS*/ >> +typedef struct logsv_loga_clm_status_param_tag { >> + uint32_t clm_node_status; >> +} logsv_lga_clm_status_cbk_t; >> + >> + >> typedef struct { >> - lgsv_cbk_msg_type_t type; /* callback type */ >> - uint32_t lgs_client_id; /* lgs client_id */ >> - SaInvocationT inv; /* invocation value */ >> + lgsv_cbk_msg_type_t type; /* callback type */ >> + uint32_t lgs_client_id; /* lgs client_id */ >> + SaInvocationT inv; /* invocation value */ >> + /* union {*/ >> lgsv_write_log_callback_ind_t write_cbk; >> + logsv_lga_clm_status_cbk_t clm_node_status_cbk; >> + /* } param; */ > [Vu] Remove 02 above comments. >> } lgsv_cbk_info_t; >> >> /* API Response parameter definitions */ ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/opensaf-devel
