osaf/libs/agents/saf/lga/lga.h | 3 + osaf/libs/agents/saf/lga/lga_api.c | 52 ++++++++++++++++++++++ osaf/libs/agents/saf/lga/lga_mds.c | 69 ++++++++++++++++++++++++++++++ osaf/libs/agents/saf/lga/lga_util.c | 2 + osaf/libs/common/logsv/include/lgsv_defs.h | 6 ++- osaf/libs/common/logsv/include/lgsv_msg.h | 17 +++++- 6 files changed, 144 insertions(+), 5 deletions(-)
V4 specific : Incorporated Lennart & Vu review comments , for more details see the review comments posted on V3 patch. Test added Description: Form CLM integration is supported from Log Service A.02.02. At-least a A.02.02 LGA client will check CLM membership status of client's node. old LGA clients A.02.01 are always clm member. This patch enhanced the log service for Unavailability of the Log Service API on a Non-Member Node which will fail with SA_AIS_ERR_UNAVAILABLE. After this patch the Log Service does not provide service to processes on cluster nodes that are not in the cluster membership. If the node rejoins the cluster membership, processes executing on the node will be able to reinitialize new library handles and use the entire set of Log Service APIs that operate on these new handles; however, invocation of APIs that operate on handles acquired by any process before the node left the membership will continue to fail with SA_AIS_ERR_UNAVAILABLE (or with the special treatment described above for asynchronous calls) with the exception of saLogFinalize(), which is used to free the library handles and all resources associated with these handles. Hence, it is recommended for the processes to finalize the library handles as soon as the processes detect that the node left the membership. Detailed README will be provide soon. Following are expected Log Service API behavior : Case1: On Non-Member Node, Log Service API will fail with code SA_AIS_ERR_UNAVAILABLE (31) Case2: On Member Node after recovered from Non-Member Node, Log Service API will fail with code SA_AIS_ERR_UNAVAILABLE (31) Case3: Non-Member Node + (Headless) Log Service API will fail with code SA_AIS_ERR_UNAVAILABLE (31) Case4: On Non-Member Node + (Headless) + (Head Joined) Log Service API will fail with code SA_AIS_ERR_UNAVAILABLE (31) diff --git a/osaf/libs/agents/saf/lga/lga.h b/osaf/libs/agents/saf/lga/lga.h --- a/osaf/libs/agents/saf/lga/lga.h +++ b/osaf/libs/agents/saf/lga/lga.h @@ -21,6 +21,7 @@ #include <pthread.h> #include <assert.h> #include <saLog.h> +#include <saClm.h> #include <ncs_main_papi.h> #include <ncssysf_ipc.h> @@ -77,6 +78,7 @@ typedef struct lga_client_hdl_rec { * Set when client is initialized an all * streams are recovered */ + bool is_stale_client; /* Status of client based on the CLM status of node.*/ } lga_client_hdl_rec_t; /* States of the server */ @@ -118,6 +120,7 @@ typedef struct { /* LGS LGA sync params */ int lgs_sync_awaited; NCS_SEL_OBJ lgs_sync_sel; + SaClmClusterChangesT clm_node_state; /*Reflects CLM status of this node(for future use).*/ } lga_cb_t; /* lga_saf_api.c */ diff --git a/osaf/libs/agents/saf/lga/lga_api.c b/osaf/libs/agents/saf/lga/lga_api.c --- a/osaf/libs/agents/saf/lga/lga_api.c +++ b/osaf/libs/agents/saf/lga/lga_api.c @@ -325,6 +325,17 @@ SaAisErrorT saLogSelectionObjectGet(SaLo rc = SA_AIS_ERR_BAD_HANDLE; goto done; } + + osaf_mutex_lock_ordie(&lga_cb.cb_lock); + /*Check CLM membership of node.*/ + if (hdl_rec->is_stale_client == true) { + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); + ncshm_give_hdl(logHandle); + TRACE("Node not CLM member or stale client"); + rc = SA_AIS_ERR_UNAVAILABLE; + goto done; + } + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); /* Obtain the selection object from the IPC queue */ sel_obj = m_NCS_IPC_GET_SEL_OBJ(&hdl_rec->mbx); @@ -382,6 +393,17 @@ SaAisErrorT saLogDispatch(SaLogHandleT l goto done; } + osaf_mutex_lock_ordie(&lga_cb.cb_lock); + /*Check CLM membership of node.*/ + if (hdl_rec->is_stale_client == true) { + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); + ncshm_give_hdl(logHandle); + TRACE("Node not CLM member or stale client"); + rc = SA_AIS_ERR_UNAVAILABLE; + goto done; + } + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); + if ((rc = lga_hdl_cbk_dispatch(&lga_cb, hdl_rec, dispatchFlags)) != SA_AIS_OK) TRACE("LGA_DISPATCH_FAILURE"); @@ -770,6 +792,16 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl goto done; } + osaf_mutex_lock_ordie(&lga_cb.cb_lock); + /*Check CLM membership of node.*/ + if (hdl_rec->is_stale_client == true) { + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); + TRACE("%s Node not CLM member or stale client", __FUNCTION__); + ais_rc = SA_AIS_ERR_UNAVAILABLE; + goto done_give_hdl; + } + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); + /*** * Handle states * Synchronize with mds and recovery thread (mutex) @@ -1188,6 +1220,16 @@ SaAisErrorT saLogWriteLogAsync(SaLogStre goto done_give_hdl_stream; } + osaf_mutex_lock_ordie(&lga_cb.cb_lock); + /*Check CLM membership of node.*/ + if (hdl_rec->is_stale_client == true) { + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); + TRACE("%s Node not CLM member or stale client", __FUNCTION__); + ais_rc = SA_AIS_ERR_UNAVAILABLE; + goto done_give_hdl_all; + } + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); + if ((hdl_rec->reg_cbk.saLogWriteLogCallback == NULL) && (ackFlags == SA_LOG_RECORD_WRITE_ACK)) { TRACE("%s: Write Callback not registered", __FUNCTION__); ais_rc = SA_AIS_ERR_INIT; @@ -1336,6 +1378,16 @@ SaAisErrorT saLogStreamClose(SaLogStream goto done_give_hdl_stream; } + osaf_mutex_lock_ordie(&lga_cb.cb_lock); + /*Check CLM membership of node.*/ + if (hdl_rec->is_stale_client == true) { + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); + TRACE("Node not CLM member or stale client"); + ais_rc = SA_AIS_ERR_UNAVAILABLE; + goto rmv_stream; + } + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); + if (is_lga_state(LGA_NO_SERVER)) { /* No server is available. Remove the stream from client database. * Server side will manage to release resources of this stream when up. diff --git a/osaf/libs/agents/saf/lga/lga_mds.c b/osaf/libs/agents/saf/lga/lga_mds.c --- a/osaf/libs/agents/saf/lga/lga_mds.c +++ b/osaf/libs/agents/saf/lga/lga_mds.c @@ -518,6 +518,41 @@ static uint32_t lga_lgs_msg_proc(lga_cb_ } break; + case LGSV_CLM_NODE_STATUS_CALLBACK: + { + lga_client_hdl_rec_t *lga_hdl_rec; + + TRACE_2("LGSV_CLM_NODE_STATUS_CALLBACK clm_node_status:%d", + lgsv_msg->info.cbk_info.clm_node_status_cbk.clm_node_status); + + /** Lookup the hdl rec by client_id **/ + if (NULL == (lga_hdl_rec = + lga_find_hdl_rec_by_regid(cb, lgsv_msg->info.cbk_info.lgs_client_id))) { + TRACE("regid not found"); + lga_msg_destroy(lgsv_msg); + TRACE_LEAVE(); + return NCSCC_RC_FAILURE; + } + cb->clm_node_state = lgsv_msg->info.cbk_info.clm_node_status_cbk.clm_node_status; + //A client becomes stale if Node loses CLM Membership. + if (cb->clm_node_state != SA_CLM_NODE_JOINED) { + /*If the node rejoins the cluster membership, processes executing on the node will be + able to reinitialize new library handles and use the entire set of Log Service APIs that + operate on these new handles; however, invocation of APIs that operate on handles + acquired by any process before the node left the membership will continue to fail with + SA_AIS_ERR_UNAVAILABLE (or with the special treatment described above for + asynchronous calls) with the exception of saLogFinalize(), which is used to free the + library handles and all resources associated with these handles. Hence, it is recommended + for the processes to finalize the library handles as soon as the processes + detect that the node left the membership.*/ + lga_hdl_rec->is_stale_client = true; + TRACE("CLM_NODE callback is_stale_client: %d clm_node_state: %d", + lga_hdl_rec->is_stale_client, cb->clm_node_state); + } + lga_msg_destroy(lgsv_msg); + } + break; + default: TRACE("unknown type %d", lgsv_msg->info.cbk_info.type); lga_msg_destroy(lgsv_msg); @@ -862,6 +897,35 @@ static uint32_t lga_dec_write_cbk_msg(NC } /**************************************************************************** +Name : lga_dec_clm_node_status_cbk_msg + +Description : This routine decodes message + +Arguments : NCS_UBAID *msg, +LGSV_MSG *msg + +Return Values : uint32_t + +Notes : None. + ******************************************************************************/ +static uint32_t lga_dec_clm_node_status_cbk_msg(NCS_UBAID *uba, lgsv_msg_t *msg) +{ + uint8_t *p8; + uint32_t total_bytes = 0; + logsv_lga_clm_status_cbk_t *param = &msg->info.cbk_info.clm_node_status_cbk; + uint8_t local_data[100]; + + osafassert(uba != NULL); + + p8 = ncs_dec_flatten_space(uba, local_data, 4); + param->clm_node_status = ncs_decode_32bit(&p8); + ncs_dec_skip_space(uba, 4); + total_bytes += 4; + + return total_bytes; +} + +/**************************************************************************** Name : lga_dec_lstr_open_sync_rsp_msg Description : This routine decodes a log stream open sync response message @@ -978,6 +1042,11 @@ static uint32_t lga_mds_dec(struct ncsmd msg->info.cbk_info.lgs_client_id); total_bytes += lga_dec_write_cbk_msg(uba, msg); break; + case LGSV_CLM_NODE_STATUS_CALLBACK: + TRACE_2("decode clm node status message, lgs_client_id=%d", + msg->info.cbk_info.lgs_client_id); + total_bytes += lga_dec_clm_node_status_cbk_msg(uba, msg); + break; default: TRACE_2("Unknown callback type = %d!", msg->info.cbk_info.type); break; diff --git a/osaf/libs/agents/saf/lga/lga_util.c b/osaf/libs/agents/saf/lga/lga_util.c --- a/osaf/libs/agents/saf/lga/lga_util.c +++ b/osaf/libs/agents/saf/lga/lga_util.c @@ -54,6 +54,7 @@ static unsigned int lga_create(void) osaf_mutex_lock_ordie(&lga_cb.cb_lock); lga_cb.lgs_sync_awaited = 0; + lga_cb.clm_node_state = SA_CLM_NODE_JOINED; osaf_mutex_unlock_ordie(&lga_cb.cb_lock); /* No longer needed */ @@ -722,6 +723,7 @@ lga_client_hdl_rec_t *lga_hdl_rec_add(lg **/ rec->lgs_client_id = client_id; + rec->is_stale_client = false; /*** * Initiate the recovery flags * The setting means that the client is initialized and that there is diff --git a/osaf/libs/common/logsv/include/lgsv_defs.h b/osaf/libs/common/logsv/include/lgsv_defs.h --- a/osaf/libs/common/logsv/include/lgsv_defs.h +++ b/osaf/libs/common/logsv/include/lgsv_defs.h @@ -20,7 +20,11 @@ #define LOG_RELEASE_CODE 'A' #define LOG_MAJOR_VERSION 2 -#define LOG_MINOR_VERSION 1 +#define LOG_MINOR_VERSION 2 + +#define LOG_RELEASE_CODE_0 'A' +#define LOG_MAJOR_VERSION_0 2 +#define LOG_MINOR_VERSION_0 1 // Waiting time in library for sync send, unit 10ms #define LGS_WAIT_TIME 1000 diff --git a/osaf/libs/common/logsv/include/lgsv_msg.h b/osaf/libs/common/logsv/include/lgsv_msg.h --- a/osaf/libs/common/logsv/include/lgsv_msg.h +++ b/osaf/libs/common/logsv/include/lgsv_msg.h @@ -45,6 +45,7 @@ typedef enum { /* LGSV Callback enums */ typedef enum { LGSV_WRITE_LOG_CALLBACK_IND = 0, + LGSV_CLM_NODE_STATUS_CALLBACK = 1, LGSV_LGS_CBK_MAX } lgsv_cbk_msg_type_t; @@ -121,12 +122,20 @@ typedef struct { SaAisErrorT error; } lgsv_write_log_callback_ind_t; -/* wrapper structure for all the callbacks */ +/*CLM node status callback structure for LGS*/ +typedef struct logsv_loga_clm_status_param_tag { + uint32_t clm_node_status; +} logsv_lga_clm_status_cbk_t; + +/* wrapper structure for all the callbacks */ typedef struct { - lgsv_cbk_msg_type_t type; /* callback type */ - uint32_t lgs_client_id; /* lgs client_id */ - SaInvocationT inv; /* invocation value */ + lgsv_cbk_msg_type_t type; /* callback type */ + uint32_t lgs_client_id; /* lgs client_id */ + SaInvocationT inv; /* invocation value */ + /* union {*/ lgsv_write_log_callback_ind_t write_cbk; + logsv_lga_clm_status_cbk_t clm_node_status_cbk; + /* } param; */ } lgsv_cbk_info_t; /* API Response parameter definitions */ ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel