Hi Vu Mathi has a point here. It is actually possible to serve stream close also when headless. If in state LGA_NO_SERVER the stream can be removed from the agent client database without sending a message to the (not existing) server. In IMM there will exist a stream object and if this was the only client the stream object should have been removed by the server. This means that we now have a "dead" stream object. However there is a mechanism in the server that will clean up such objects after a timeout.
Regards Lennart > -----Original Message----- > From: Mathivanan Naickan Palanivelu [mailto:[email protected]] > Sent: den 23 februari 2016 13:44 > To: Vu Minh Nguyen > Cc: Lennart Lund; [email protected]; Anders Widell > Subject: Re: [PATCH 2 of 4] log: add support for cloud resilience feature > (agent part) [#1179] > > Hi Vu, > > Ack for patch 2. > > B.T.w, What is the idea behind returning try_again for streamclose(), i.e. as > below? > > + if (lga_state == LGA_NO_SERVER) { > + /* We have no server and cannot write. The client may try > again > + */ > + TRACE("%s No server", __FUNCTION__); > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > + goto done_give_hdl_stream; > + } > + > > > > Mathi. > > ----- [email protected] wrote: > > > osaf/libs/agents/saf/lga/Makefile.am | 6 +- > > osaf/libs/agents/saf/lga/lga.h | 53 +- > > osaf/libs/agents/saf/lga/lga_api.c | 858 > > ++++++++++++++++++++++++---------- > > osaf/libs/agents/saf/lga/lga_mds.c | 46 +- > > osaf/libs/agents/saf/lga/lga_state.c | 670 > > +++++++++++++++++++++++++++ > > osaf/libs/agents/saf/lga/lga_state.h | 41 + > > osaf/libs/agents/saf/lga/lga_util.c | 94 +++- > > 7 files changed, 1479 insertions(+), 289 deletions(-) > > > > > > The patch makes LOG service be able to handle the case that both SC > > nodes > > are down at the same time. > > When one or both nodes go up again the log service must be able to > > resume its work preferably without actions by the clients. > > > > A log client should not have to be aware of if one or both SC nodes > > are down. > > The only thing that should happen is that a TRY AGAIN (and in some > > cases TIMEOUT) returned. > > It is the responsibility of the client to decide how to handle this. > > > > diff --git a/osaf/libs/agents/saf/lga/Makefile.am > > b/osaf/libs/agents/saf/lga/Makefile.am > > --- a/osaf/libs/agents/saf/lga/Makefile.am > > +++ b/osaf/libs/agents/saf/lga/Makefile.am > > @@ -20,7 +20,8 @@ include $(top_srcdir)/Makefile.common > > MAINTAINERCLEANFILES = Makefile.in > > > > noinst_HEADERS = \ > > - lga.h > > + lga.h \ > > + lga_state.h > > > > noinst_LTLIBRARIES = liblga.la > > > > @@ -31,6 +32,7 @@ liblga_la_CPPFLAGS = \ > > liblga_la_SOURCES = \ > > lga_api.c \ > > lga_util.c \ > > - lga_mds.c > > + lga_mds.c \ > > + lga_state.c > > > > liblga_la_LDFLAGS = -static > > diff --git a/osaf/libs/agents/saf/lga/lga.h > > b/osaf/libs/agents/saf/lga/lga.h > > --- a/osaf/libs/agents/saf/lga/lga.h > > +++ b/osaf/libs/agents/saf/lga/lga.h > > @@ -29,6 +29,7 @@ > > #include <ncsencdec_pub.h> > > #include <ncs_util.h> > > #include <logtrace.h> > > +#include <osaf_time.h> > > > > #include "lgsv_msg.h" > > #include "lgsv_defs.h" > > @@ -49,6 +50,11 @@ typedef struct lga_log_stream_hdl_rec { > > unsigned int lgs_log_stream_id; /* server reference > for this log > > stream */ > > struct lga_log_stream_hdl_rec *next; /* next pointer for > the list in > > lga_client_hdl_rec_t */ > > struct lga_client_hdl_rec *parent_hdl; /* Back Pointer to > the client > > instantiation */ > > + /* This flag is used with recovery handling. It is valid only > > + * after server down has happened (will be initiated when > server > > down > > + * event occurs). It's not valid in LGA_NORMAL state. > > + */ > > + bool recovered_flag; > > } lga_log_stream_hdl_rec_t; > > > > /* LGA client record */ > > @@ -59,8 +65,46 @@ typedef struct lga_client_hdl_rec { > > lga_log_stream_hdl_rec_t *stream_list; /* List of open > streams per > > client */ > > SYSF_MBX mbx; /* priority q mbx > b/w MDS & Library */ > > struct lga_client_hdl_rec *next; /* next pointer for > the list in > > lga_cb_t */ > > + /* These flags are used with recovery handling. They are valid > only > > + * after server down has happened (will be initiated when > server > > down > > + * event occurs). They are not valid in LGA_NORMAL state > > + */ > > + bool initialized_flag; /* Used with "headless" recovery > > handling > > + * Set when client > is initialized. Streams > > + * may not have > been recovered > > + */ > > + bool recovered_flag; /* Used with "headless" recovery > > handling > > + * Set when client > is initialized an all > > + * streams are > recovered > > + */ > > } lga_client_hdl_rec_t; > > > > +/* States of the server */ > > +typedef enum { > > + LGS_START, /* The state before agent is started > > + */ > > + LGS_DOWN, /* Server is down (headless) > > + */ > > + LGS_NO_ACTIVE, /* No active server (switch/fail - over) > > + */ > > + LGS_UP /* Server is up > > + */ > > +} lgs_state_t; > > + > > +/* Agent internal states */ > > +typedef enum { > > + LGA_NORMAL, /* Server is up and no recovery is ongoing > > + */ > > + LGA_NO_SERVER, /* No Server (Server down) state > > + */ > > + LGA_RECOVERY1, /* Server is up. Recover clients and streams > when > > + * request from client. > > + * Recovery1 timer is running > > + */ > > + LGA_RECOVERY2 /* Auto recover remaining clients and > streams > > + * After recovery1 timeout > > + */ > > +} lga_state_t; > > /* > > * The LGA control block is the master anchor structure for all LGA > > * instantiations within a process. > > @@ -70,8 +114,8 @@ typedef struct { > > lga_client_hdl_rec_t *client_list; /* LGA client > handle database */ > > MDS_HDL mds_hdl; /* MDS handle */ > > MDS_DEST lgs_mds_dest; /* LGS absolute/virtual address */ > > - int lgs_up; /* Indicate that MDS subscription > > - * is complete */ > > + lgs_state_t lgs_state; /* Indicate current server MDS > state */ > > + lga_state_t lga_state; /* Indicate current state of the agent */ > > /* LGS LGA sync params */ > > int lgs_sync_awaited; > > NCS_SEL_OBJ lgs_sync_sel; > > @@ -88,8 +132,9 @@ extern uint32_t lga_mds_msg_async_send(l > > extern void lgsv_lga_evt_free(struct lgsv_msg *); > > > > /* lga_init.c */ > > -extern unsigned int lga_startup(void); > > -extern unsigned int lga_shutdown(void); > > +unsigned int lga_startup(lga_cb_t *cb); > > +extern unsigned int lga_shutdown_after_last_client(void); > > +extern unsigned int lga_force_shutdown(void); > > > > /* lga_hdl.c */ > > extern SaAisErrorT lga_hdl_cbk_dispatch(lga_cb_t *, > > lga_client_hdl_rec_t *, SaDispatchFlagsT); > > diff --git a/osaf/libs/agents/saf/lga/lga_api.c > > b/osaf/libs/agents/saf/lga/lga_api.c > > --- a/osaf/libs/agents/saf/lga/lga_api.c > > +++ b/osaf/libs/agents/saf/lga/lga_api.c > > @@ -16,8 +16,11 @@ > > */ > > > > #include <string.h> > > +#include <saf_error.h> > > #include "lga.h" > > > > +#include "lga_state.h" > > + > > #define NCS_SAF_MIN_ACCEPT_TIME 10 > > > > /* Macro to validate the dispatch flags */ > > @@ -38,6 +41,8 @@ > > /* The main controle block */ > > lga_cb_t lga_cb = { > > .cb_lock = PTHREAD_MUTEX_INITIALIZER, > > + .lga_state = LGA_NORMAL, > > + .lgs_state = LGS_START > > }; > > > > static void populate_open_params(lgsv_stream_open_req_t > *open_param, > > @@ -116,20 +121,16 @@ SaAisErrorT saLogInitialize(SaLogHandleT > > { > > lga_client_hdl_rec_t *lga_hdl_rec; > > lgsv_msg_t i_msg, *o_msg; > > - SaAisErrorT rc; > > - uint32_t client_id; > > + SaAisErrorT ais_rc = SA_AIS_OK; > > + int rc; > > + uint32_t client_id = 0; > > > > TRACE_ENTER(); > > > > + /* Verify parameters (log handle and that version is given) */ > > if ((logHandle == NULL) || (version == NULL)) { > > TRACE("version or handle FAILED"); > > - rc = SA_AIS_ERR_INVALID_PARAM; > > - goto done; > > - } > > - > > - if ((rc = lga_startup()) != NCSCC_RC_SUCCESS) { > > - TRACE("lga_startup FAILED: %u", rc); > > - rc = SA_AIS_ERR_LIBRARY; > > + ais_rc = SA_AIS_ERR_INVALID_PARAM; > > goto done; > > } > > > > @@ -144,15 +145,59 @@ SaAisErrorT saLogInitialize(SaLogHandleT > > version->releaseCode = LOG_RELEASE_CODE; > > version->majorVersion = > LOG_MAJOR_VERSION; > > version->minorVersion = > LOG_MINOR_VERSION; > > - lga_shutdown(); > > - rc = SA_AIS_ERR_VERSION; > > + lga_shutdown_after_last_client(); > > + ais_rc = SA_AIS_ERR_VERSION; > > goto done; > > } > > > > - if (!lga_cb.lgs_up) { > > - lga_shutdown(); > > - TRACE("LGS server is down"); > > - rc = SA_AIS_ERR_TRY_AGAIN; > > + /*** > > + * Handle states > > + * Synchronize with mds and recovery thread (mutex) > > + * NOTE: Nothing to handle if recovery state 1 > > + */ > > + pthread_mutex_lock(&lga_cb.cb_lock); > > + lgs_state_t lgs_state = lga_cb.lgs_state; > > + lga_state_t lga_state = lga_cb.lga_state; > > + pthread_mutex_unlock(&lga_cb.cb_lock); > > + > > + if (lgs_state == LGS_NO_ACTIVE) { > > + /* We have a server but it is temporary > unavailable. Client may > > + * try again > > + */ > > + TRACE("%s LGS no active", __FUNCTION__); > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > + goto done; > > + } > > + > > + if (lga_state == LGA_NO_SERVER) { > > + /* We have no server and cannot initialize. > > + * The client may try again > > + */ > > + TRACE("%s No server", __FUNCTION__); > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > + goto done; > > + } > > + > > + if (lga_state == LGA_RECOVERY2) { > > + /* Auto recovery is ongoing. We have to wait for > it to finish. > > + * The client may try again > > + */ > > + TRACE("%s LGA auto recovery ongoing (2)", > __FUNCTION__); > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > + goto done; > > + } > > + > > + /*** > > + * Do Initiate. It's ok to initiate in recovery state 1 since we > > have a > > + * server and is not conflicting with auto recovery > > + */ > > + > > + /* Initiate the client in the agent and if first client also start > > MDS > > + * etc. > > + */ > > + if ((rc = lga_startup(&lga_cb)) != NCSCC_RC_SUCCESS) { > > + TRACE("lga_startup FAILED: %u", rc); > > + ais_rc = SA_AIS_ERR_LIBRARY; > > goto done; > > } > > > > @@ -165,18 +210,18 @@ SaAisErrorT saLogInitialize(SaLogHandleT > > /* Send a message to LGS to obtain a client_id/server ref id > which > > is cluster > > * wide unique. > > */ > > - rc = lga_mds_msg_sync_send(&lga_cb, &i_msg, &o_msg, > > LGS_WAIT_TIME,MDS_SEND_PRIORITY_HIGH); > > + rc = lga_mds_msg_sync_send(&lga_cb, &i_msg, &o_msg, > LGS_WAIT_TIME, > > MDS_SEND_PRIORITY_HIGH); > > if (rc != NCSCC_RC_SUCCESS) { > > - lga_shutdown(); > > - rc = SA_AIS_ERR_TRY_AGAIN; > > + lga_shutdown_after_last_client(); > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > goto done; > > } > > > > /** Make sure the LGS return status was SA_AIS_OK > > **/ > > - if (SA_AIS_OK != o_msg->info.api_resp_info.rc) { > > - rc = o_msg->info.api_resp_info.rc; > > - TRACE("LGS return FAILED"); > > + ais_rc = o_msg->info.api_resp_info.rc; > > + if (SA_AIS_OK != ais_rc) { > > + TRACE("%s LGS error response %s", > __FUNCTION__, > > saf_error(ais_rc)); > > goto err; > > } > > > > @@ -189,27 +234,26 @@ SaAisErrorT saLogInitialize(SaLogHandleT > > /* create the hdl record & store the callbacks */ > > lga_hdl_rec = lga_hdl_rec_add(&lga_cb, callbacks, client_id); > > if (lga_hdl_rec == NULL) { > > - rc = SA_AIS_ERR_NO_MEMORY; > > + ais_rc = SA_AIS_ERR_NO_MEMORY; > > goto err; > > } > > > > /* pass the handle value to the appl */ > > - if (SA_AIS_OK == rc) > > - *logHandle = lga_hdl_rec->local_hdl; > > + *logHandle = lga_hdl_rec->local_hdl; > > > > err: > > /* free up the response message */ > > if (o_msg) > > lga_msg_destroy(o_msg); > > > > - if (rc != SA_AIS_OK) { > > - TRACE_2("LGA INIT FAILED\n"); > > - lga_shutdown(); > > + if (ais_rc != SA_AIS_OK) { > > + TRACE_2("%s LGA INIT FAILED\n", > __FUNCTION__); > > + lga_shutdown_after_last_client(); > > } > > > > done: > > - TRACE_LEAVE(); > > - return rc; > > + TRACE_LEAVE2("client_id = %d", client_id); > > + return ais_rc; > > } > > > > > > > /********************************************************** > ***************** > > @@ -325,6 +369,61 @@ SaAisErrorT saLogDispatch(SaLogHandleT l > > return rc; > > } > > > > > +/********************************************************* > ********************* > > + * Finalize > > + * API function and help functions > > + > > > ********************************************************** > ********************/ > > + > > +/** > > + * Create and send a Finalize message to the server > > + * > > + * @param hdl_rec > > + * @return AIS return code > > + */ > > +static SaAisErrorT send_Finalize_msg(lga_client_hdl_rec_t *hdl_rec) > > +{ > > + uint32_t mds_rc; > > + lgsv_msg_t msg, *o_msg = NULL; > > + SaAisErrorT ais_rc = SA_AIS_OK; > > + > > + TRACE_ENTER(); > > + > > + memset(&msg, 0, sizeof(lgsv_msg_t)); > > + msg.type = LGSV_LGA_API_MSG; > > + msg.info.api_info.type = LGSV_FINALIZE_REQ; > > + msg.info.api_info.param.finalize.client_id = > > hdl_rec->lgs_client_id; > > + > > + mds_rc = lga_mds_msg_sync_send( > > + &lga_cb, &msg, > > + &o_msg, > > + LGS_WAIT_TIME, > > + MDS_SEND_PRIORITY_MEDIUM > > + ); > > + switch (mds_rc) { > > + case NCSCC_RC_SUCCESS: > > + break; > > + case NCSCC_RC_REQ_TIMOUT: > > + ais_rc = SA_AIS_ERR_TIMEOUT; > > + TRACE("lga_mds_msg_sync_send FAILED: %s", > saf_error(ais_rc)); > > + goto done; > > + default: > > + TRACE("lga_mds_msg_sync_send FAILED: %s", > saf_error(ais_rc)); > > + ais_rc = SA_AIS_ERR_NO_RESOURCES; > > + goto done; > > + } > > + > > + if (o_msg != NULL) { > > + ais_rc = o_msg->info.api_resp_info.rc; > > + lga_msg_destroy(o_msg); > > + } else > > + ais_rc = SA_AIS_ERR_NO_RESOURCES; > > + > > + done: > > + > > + TRACE_LEAVE(); > > + return ais_rc; > > +} > > + > > > > > /********************************************************** > ***************** > > * 8.4.4 > > * > > @@ -350,84 +449,119 @@ SaAisErrorT saLogDispatch(SaLogHandleT l > > SaAisErrorT saLogFinalize(SaLogHandleT logHandle) > > { > > lga_client_hdl_rec_t *hdl_rec; > > - lgsv_msg_t msg, *o_msg = NULL; > > - SaAisErrorT rc = SA_AIS_OK; > > - uint32_t mds_rc; > > + SaAisErrorT ais_rc = SA_AIS_OK; > > + uint32_t rc; > > > > TRACE_ENTER(); > > > > /* retrieve hdl rec */ > > hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA, logHandle); > > if (hdl_rec == NULL) { > > - TRACE("ncshm_take_hdl failed"); > > - rc = SA_AIS_ERR_BAD_HANDLE; > > + TRACE("%s ncshm_take_hdl failed", > __FUNCTION__); > > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > > goto done; > > } > > > > - /* Check Whether LGS is up or not */ > > - if (!lga_cb.lgs_up) { > > - TRACE("LGS down"); > > - rc = SA_AIS_ERR_TRY_AGAIN; > > + /*** > > + * Handle states > > + * Synchronize with mds and recovery thread (mutex) > > + */ > > + pthread_mutex_lock(&lga_cb.cb_lock); > > + lgs_state_t lgs_state = lga_cb.lgs_state; > > + lga_state_t lga_state = lga_cb.lga_state; > > + pthread_mutex_unlock(&lga_cb.cb_lock); > > + > > + if (lgs_state == LGS_NO_ACTIVE) { > > + /* We have a server but it is temporary > unavailable. Client may > > + * try again > > + */ > > + TRACE("%s lgs_state = LGS no active", > __FUNCTION__); > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > goto done_give_hdl; > > } > > > > - /** populate & send the finalize message > > - ** and make sure the finalize from the server > > - ** end returned before deleting the local records. > > - **/ > > - memset(&msg, 0, sizeof(lgsv_msg_t)); > > - msg.type = LGSV_LGA_API_MSG; > > - msg.info.api_info.type = LGSV_FINALIZE_REQ; > > - msg.info.api_info.param.finalize.client_id = > > hdl_rec->lgs_client_id; > > - > > - mds_rc = lga_mds_msg_sync_send(&lga_cb, &msg, &o_msg, > > LGS_WAIT_TIME,MDS_SEND_PRIORITY_MEDIUM); > > - switch (mds_rc) { > > - case NCSCC_RC_SUCCESS: > > - break; > > - case NCSCC_RC_REQ_TIMOUT: > > - rc = SA_AIS_ERR_TIMEOUT; > > - TRACE("lga_mds_msg_sync_send FAILED: %u", > rc); > > - goto done_give_hdl; > > - default: > > - TRACE("lga_mds_msg_sync_send FAILED: %u", > rc); > > - rc = SA_AIS_ERR_NO_RESOURCES; > > + if (lga_state == LGA_NO_SERVER) { > > + /* We have no server but can still finalize client. > > + * In this situation no message to server is sent > > + */ > > + TRACE("%s lga_state = LGS down", > __FUNCTION__); > > + ais_rc = SA_AIS_OK; > > goto done_give_hdl; > > } > > > > - if (o_msg != NULL) { > > - rc = o_msg->info.api_resp_info.rc; > > - lga_msg_destroy(o_msg); > > - } else > > - rc = SA_AIS_ERR_NO_RESOURCES; > > + if (lga_state == LGA_RECOVERY2) { > > + /* Auto recovery is ongoing. We have to wait for > it to finish. > > + * The client may try again > > + */ > > + TRACE("%s lga_state = LGA auto recovery > ongoing (2)", > > __FUNCTION__); > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > + goto done_give_hdl; > > + } > > > > - if (rc == SA_AIS_OK) { > > - rc = lga_hdl_rec_del(&lga_cb.client_list, > hdl_rec); > > - if (rc != NCSCC_RC_SUCCESS) > > - rc = SA_AIS_ERR_BAD_HANDLE; > > + if (lga_state == LGA_RECOVERY1) { > > + /* We are in recovery state 1. Client may or may > not have been > > + * initialized. If initialized a finalize request must > be sent > > + * to the server else the client is finalized in the > agent only > > + */ > > + TRACE("%s lga_state = Recovery state (1)", > __FUNCTION__); > > + if (hdl_rec->initialized_flag == false) { > > + TRACE("\t Client is not > initialized"); > > + goto done_give_hdl; > > + } > > + TRACE("\t Client is initialized"); > > + } > > + > > + /*** > > + * Populate & send the finalize message > > + * and make sure the finalize from the server > > + * end returned before deleting the local records. > > + */ > > + ais_rc = send_Finalize_msg(hdl_rec); > > + > > + if (ais_rc == SA_AIS_OK) { > > + TRACE("%s delete_one_client", > __FUNCTION__); > > + (void) delete_one_client(&lga_cb.client_list, > hdl_rec); > > } > > > > done_give_hdl: > > ncshm_give_hdl(logHandle); > > > > - if (rc == SA_AIS_OK) { > > - rc = lga_shutdown(); > > + if (ais_rc == SA_AIS_OK) { > > + rc = lga_shutdown_after_last_client(); > > if (rc != NCSCC_RC_SUCCESS) > > TRACE("lga_shutdown "); > > } > > > > done: > > - TRACE_LEAVE2("rc = %u", rc); > > - return rc; > > + TRACE_LEAVE2("ais_rc = %s", saf_error(ais_rc)); > > + return ais_rc; > > } > > > > -static SaAisErrorT validate_open_params(SaLogHandleT logHandle, > > - const > SaNameT *logStreamName, > > - const > SaLogFileCreateAttributesT_2 *logFileCreateAttributes, > > - > SaLogStreamOpenFlagsT logStreamOpenFlags, > > - > SaTimeT timeOut, SaLogStreamHandleT *logStreamHandle, > uint32_t > > *header_type) > > > +/********************************************************* > ********************* > > + * Open a stream > > + * API function and help functions > > + > > > ********************************************************** > ********************/ > > + > > +/** > > + * Check input parameters for opening a stream > > + * > > + * @param logStreamName > > + * @param logFileCreateAttributes > > + * @param logStreamOpenFlags > > + * @param logStreamHandle > > + * @param header_type > > + * @return > > + */ > > +static SaAisErrorT validate_open_params( > > + const SaNameT *logStreamName, > > + const SaLogFileCreateAttributesT_2 *logFileCreateAttributes, > > + SaLogStreamOpenFlagsT logStreamOpenFlags, > > + SaLogStreamHandleT *logStreamHandle, > > + uint32_t *header_type > > + ) > > { > > size_t len; > > - SaAisErrorT rc = SA_AIS_OK; > > + SaAisErrorT ais_rc = SA_AIS_OK; > > > > TRACE_ENTER(); > > > > @@ -435,7 +569,7 @@ static SaAisErrorT validate_open_params( > > > > if ((NULL == logStreamName) || (NULL == logStreamHandle)) { > > TRACE("SA_AIS_ERR_INVALID_PARAM => NULL > pointer check"); > > - rc = SA_AIS_ERR_INVALID_PARAM; > > + ais_rc = SA_AIS_ERR_INVALID_PARAM; > > goto done; > > } > > > > @@ -450,7 +584,7 @@ static SaAisErrorT validate_open_params( > > */ > > if (logStreamName->length >= SA_MAX_NAME_LENGTH) { > > TRACE("SA_AIS_ERR_INVALID_PARAM, > logStreamName->length > > > SA_MAX_NAME_LENGTH"); > > - rc = SA_AIS_ERR_INVALID_PARAM; > > + ais_rc = SA_AIS_ERR_INVALID_PARAM; > > goto done; > > } > > > > @@ -575,10 +709,11 @@ static SaAisErrorT validate_open_params( > > > > done: > > TRACE_LEAVE(); > > - return rc; > > + return ais_rc; > > } > > > > /** > > + * API function for opening a stream > > * > > * @param logHandle > > * @param logStreamName > > @@ -599,29 +734,85 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl > > lga_client_hdl_rec_t *hdl_rec; > > lgsv_msg_t msg, *o_msg = NULL; > > lgsv_stream_open_req_t *open_param; > > - SaAisErrorT rc; > > + SaAisErrorT ais_rc; > > + int rc = 0; > > + uint32_t ncs_rc; > > uint32_t timeout; > > uint32_t log_stream_id; > > uint32_t log_header_type = 0; > > > > TRACE_ENTER(); > > > > - rc = validate_open_params(logHandle, logStreamName, > > logFileCreateAttributes, > > - > logStreamOpenFlags, timeOut, logStreamHandle, > > &log_header_type); > > - if (rc != SA_AIS_OK) > > + ais_rc = validate_open_params(logStreamName, > > logFileCreateAttributes, > > + > logStreamOpenFlags, logStreamHandle, &log_header_type); > > + if (ais_rc != SA_AIS_OK) > > goto done; > > > > /* retrieve log service hdl rec */ > > hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA, logHandle); > > if (hdl_rec == NULL) { > > TRACE("ncshm_take_hdl failed"); > > - rc = SA_AIS_ERR_BAD_HANDLE; > > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > > goto done; > > } > > > > + /*** > > + * Handle states > > + * Synchronize with mds and recovery thread (mutex) > > + */ > > + pthread_mutex_lock(&lga_cb.cb_lock); > > + lgs_state_t lgs_state = lga_cb.lgs_state; > > + lga_state_t lga_state = lga_cb.lga_state; > > + pthread_mutex_unlock(&lga_cb.cb_lock); > > + > > + if (lgs_state == LGS_NO_ACTIVE) { > > + /* We have a server but it is temporary > unavailable. Client may > > + * try again > > + */ > > + TRACE("%s LGS no active", __FUNCTION__); > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > + goto done_give_hdl; > > + } > > + > > + if (lga_state == LGA_NO_SERVER) { > > + /* We have no server and cannot open a > stream. > > + * The client may try again > > + */ > > + TRACE("%s No server", __FUNCTION__); > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > + goto done_give_hdl; > > + } > > + > > + if (lga_state == LGA_RECOVERY2) { > > + /* Auto recovery is ongoing. We have to wait for > it to finish. > > + * The client may try again > > + */ > > + TRACE("%s LGA auto recovery ongoing (2)", > __FUNCTION__); > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > + goto done_give_hdl; > > + } > > + > > + if (lga_state == LGA_RECOVERY1) { > > + /* We are in recovery 1 state. > > + * Recover client and execute the request > > + */ > > + rc = recover_one_client(hdl_rec); > > + if (rc == -1) { > > + /* Client could not be recovered. > Delete client and > > + * return BAD HANDLE > > + */ > > + TRACE("%s delete_one_client", > __FUNCTION__); > > + (void) > delete_one_client(&lga_cb.client_list, hdl_rec); > > + ais_rc = > SA_AIS_ERR_BAD_HANDLE; > > + /* Handles are destroyed so we > shall not give handles */ > > + goto done; > > + } > > + } > > + > > + > > /** Populate a sync MDS message to obtain a log stream id and an > > - ** instance open id. > > - **/ > > + * instance open id. > > + */ > > memset(&msg, 0, sizeof(lgsv_msg_t)); > > msg.type = LGSV_LGA_API_MSG; > > msg.info.api_info.type = LGSV_STREAM_OPEN_REQ; > > @@ -641,7 +832,7 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl > > /* Construct the logFileName */ > > open_param->logFileName = (char *) > > malloc(strlen(logFileCreateAttributes->logFileName) + 1); > > if (open_param->logFileName == NULL) { > > - rc = SA_AIS_ERR_NO_MEMORY; > > + ais_rc = > SA_AIS_ERR_NO_MEMORY; > > goto done_give_hdl; > > } > > strcpy(open_param->logFileName, > > logFileCreateAttributes->logFileName); > > @@ -653,7 +844,7 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl > > > > open_param->logFilePathName = (char *) > malloc(len); > > if (open_param->logFilePathName == NULL) { > > - rc = SA_AIS_ERR_NO_MEMORY; > > + ais_rc = > SA_AIS_ERR_NO_MEMORY; > > goto done_give_hdl; > > } > > > > @@ -668,31 +859,21 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl > > > > if (timeout < NCS_SAF_MIN_ACCEPT_TIME) { > > TRACE("Timeout"); > > - rc = SA_AIS_ERR_TIMEOUT; > > - goto done_give_hdl; > > - } > > - > > - /* Check whether LGS is up or not */ > > - if (!lga_cb.lgs_up) { > > - TRACE("LGS down"); > > - rc = SA_AIS_ERR_TRY_AGAIN; > > + ais_rc = SA_AIS_ERR_TIMEOUT; > > goto done_give_hdl; > > } > > > > /* Send a sync MDS message to obtain a log stream id */ > > - rc = lga_mds_msg_sync_send(&lga_cb, &msg, &o_msg, > > timeout,MDS_SEND_PRIORITY_HIGH); > > - if (rc != NCSCC_RC_SUCCESS) { > > - if (o_msg) > > - lga_msg_destroy(o_msg); > > - rc = SA_AIS_ERR_TRY_AGAIN; > > + ncs_rc = lga_mds_msg_sync_send(&lga_cb, &msg, &o_msg, > timeout, > > MDS_SEND_PRIORITY_HIGH); > > + if (ncs_rc != NCSCC_RC_SUCCESS) { > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > goto done_give_hdl; > > } > > > > - if (SA_AIS_OK != o_msg->info.api_resp_info.rc) { > > - rc = o_msg->info.api_resp_info.rc; > > - TRACE("Bad return status!!! rc = %d", rc); > > - if (o_msg) > > - lga_msg_destroy(o_msg); > > + ais_rc = o_msg->info.api_resp_info.rc; > > + if (SA_AIS_OK != ais_rc) { > > + TRACE("Bad return status!!! rc = %d", ais_rc); > > + lga_msg_destroy(o_msg); > > goto done_give_hdl; > > } > > > > @@ -708,25 +889,28 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl > > /** Allocate an LGA_LOG_STREAM_HDL_REC structure and insert this > > * into the list of channel hdl record. > > **/ > > - lstr_hdl_rec = lga_log_stream_hdl_rec_add(&hdl_rec, > > - > log_stream_id, logStreamOpenFlags, logStreamName, > > log_header_type); > > + lstr_hdl_rec = lga_log_stream_hdl_rec_add( > > + &hdl_rec, > > + log_stream_id, logStreamOpenFlags, > > + logStreamName, log_header_type > > + ); > > if (lstr_hdl_rec == NULL) { > > pthread_mutex_unlock(&lga_cb.cb_lock); > > - if (o_msg) > > - lga_msg_destroy(o_msg); > > - rc = SA_AIS_ERR_NO_MEMORY; > > + lga_msg_destroy(o_msg); > > + ais_rc = SA_AIS_ERR_NO_MEMORY; > > goto done_give_hdl; > > } > > + > > /** UnLock LGA_CB > > **/ > > pthread_mutex_unlock(&lga_cb.cb_lock); > > > > - /** Give the hdl-mgr allocated hdl to the application. > > - **/ > > + /** Give the hdl-mgr allocated hdl to the application and free > the > > response > > + * message > > + **/ > > *logStreamHandle = > > (SaLogStreamHandleT)lstr_hdl_rec->log_stream_hdl; > > > > - if (o_msg) > > - lga_msg_destroy(o_msg); > > + lga_msg_destroy(o_msg); > > > > done_give_hdl: > > ncshm_give_hdl(logHandle); > > @@ -735,7 +919,7 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl > > > > done: > > TRACE_LEAVE(); > > - return rc; > > + return ais_rc; > > } > > > > /** > > @@ -758,6 +942,136 @@ SaAisErrorT saLogStreamOpenAsync_2(SaLog > > return SA_AIS_ERR_NOT_SUPPORTED; > > } > > > > > +/********************************************************* > ********************* > > + * Write Log record > > + * API function and help functions > > + > > > ********************************************************** > ********************/ > > + > > +/** > > + * Validate the log record and if generic header add > > + * logSvcUsrName from environment variable > SA_AMF_COMPONENT_NAME > > + * > > + * @param logRecord[in] > > + * @param logSvcUsrName[out] > > + * @param write_param[out] > > + * @return AIS return code > > + */ > > +static SaAisErrorT handle_log_record(const SaLogRecordT *logRecord, > > + SaNameT *logSvcUsrName, > > + lgsv_write_log_async_req_t *write_param) > > +{ > > + SaAisErrorT ais_rc = SA_AIS_OK; > > + SaTimeT logTimeStamp; > > + > > + TRACE_ENTER(); > > + > > + if (NULL == logRecord) { > > + TRACE("SA_AIS_ERR_INVALID_PARAM => NULL > pointer check"); > > + ais_rc = SA_AIS_ERR_INVALID_PARAM; > > + goto done; > > + } > > + > > + if (logRecord->logHdrType == SA_LOG_GENERIC_HEADER) { > > + switch (logRecord- > >logHeader.genericHdr.logSeverity) { > > + case SA_LOG_SEV_EMERGENCY: > > + case SA_LOG_SEV_ALERT: > > + case SA_LOG_SEV_CRITICAL: > > + case SA_LOG_SEV_ERROR: > > + case SA_LOG_SEV_WARNING: > > + case SA_LOG_SEV_NOTICE: > > + case SA_LOG_SEV_INFO: > > + break; > > + default: > > + TRACE("Invalid severity: %x", > > + logRecord- > >logHeader.genericHdr.logSeverity); > > + ais_rc = > SA_AIS_ERR_INVALID_PARAM; > > + goto done; > > + } > > + } > > + > > + if (logRecord->logBuffer != NULL) { > > + if ((logRecord->logBuffer->logBuf == NULL) && > > + (logRecord->logBuffer- > >logBufSize != 0)) { > > + TRACE("logBuf == NULL && > logBufSize != 0"); > > + ais_rc = > SA_AIS_ERR_INVALID_PARAM; > > + goto done; > > + } > > + } > > + > > + /* Set timeStamp data if not provided by application user */ > > + if (logRecord->logTimeStamp == SA_TIME_UNKNOWN) { > > + logTimeStamp = setLogTime(); > > + write_param->logTimeStamp = &logTimeStamp; > > + } else { > > + write_param->logTimeStamp = (SaTimeT > *)&logRecord->logTimeStamp; > > + } > > + > > + /* SA_AIS_ERR_INVALID_PARAM, bullet 2 in SAI-AIS-LOG- > A.02.01 > > + Section 3.6.3, Return Values */ > > + if (logRecord->logHdrType == SA_LOG_GENERIC_HEADER) { > > + if (logRecord- > >logHeader.genericHdr.logSvcUsrName == NULL) { > > + char *logSvcUsrChars = NULL; > > + TRACE("logSvcUsrName == > NULL"); > > + logSvcUsrChars = > getenv("SA_AMF_COMPONENT_NAME"); > > + if (logSvcUsrChars == NULL) { > > + ais_rc = > SA_AIS_ERR_INVALID_PARAM; > > + goto done; > > + } > > + logSvcUsrName->length = > strlen(logSvcUsrChars); > > + if (logSvcUsrName->length >= > SA_MAX_NAME_LENGTH) { > > + > TRACE("SA_AMF_COMPONENT_NAME is too long"); > > + ais_rc = > SA_AIS_ERR_INVALID_PARAM; > > + goto done; > > + } > > + strcpy((char *)logSvcUsrName- > >value, logSvcUsrChars); > > + write_param->logSvcUsrName = > logSvcUsrName; > > + } else { > > + if (logRecord- > >logHeader.genericHdr.logSvcUsrName->length >= > > + > SA_MAX_NAME_LENGTH) { > > + > TRACE("logSvcUsrName too long"); > > + ais_rc = > SA_AIS_ERR_INVALID_PARAM; > > + goto done; > > + } > > + logSvcUsrName->length = > > + logRecord- > >logHeader.genericHdr.logSvcUsrName->length; > > + write_param->logSvcUsrName = > > + (SaNameT > *)logRecord->logHeader.genericHdr.logSvcUsrName; > > + } > > + } > > + > > + if (logRecord->logHdrType == SA_LOG_NTF_HEADER) { > > + if (logRecord- > >logHeader.ntfHdr.notificationObject == NULL) { > > + TRACE("notificationObject == > NULL"); > > + ais_rc = > SA_AIS_ERR_INVALID_PARAM; > > + goto done; > > + } > > + > > + if (logRecord- > >logHeader.ntfHdr.notificationObject->length >= > > + SA_MAX_NAME_LENGTH) { > > + TRACE("notificationObject.length > >= SA_MAX_NAME_LENGTH"); > > + ais_rc = > SA_AIS_ERR_INVALID_PARAM; > > + goto done; > > + } > > + > > + if (logRecord->logHeader.ntfHdr.notifyingObject > == NULL) { > > + TRACE("notifyingObject == > NULL"); > > + ais_rc = > SA_AIS_ERR_INVALID_PARAM; > > + goto done; > > + } > > + > > + if (logRecord- > >logHeader.ntfHdr.notifyingObject->length >= > > + SA_MAX_NAME_LENGTH) { > > + TRACE("notifyingObject.length >= > SA_MAX_NAME_LENGTH"); > > + ais_rc = > SA_AIS_ERR_INVALID_PARAM; > > + goto done; > > + } > > + } > > + > > +done: > > + TRACE_LEAVE(); > > + return ais_rc; > > +} > > + > > /** > > * > > * @param logStreamHandle > > @@ -788,158 +1102,125 @@ SaAisErrorT saLogWriteLogAsync(SaLogStre > > lga_log_stream_hdl_rec_t *lstr_hdl_rec; > > lga_client_hdl_rec_t *hdl_rec; > > lgsv_msg_t msg; > > - SaAisErrorT rc = SA_AIS_OK; > > + SaAisErrorT ais_rc = SA_AIS_OK; > > + lgsv_write_log_async_req_t *write_param; > > SaNameT logSvcUsrName; > > - SaTimeT logTimeStamp; > > - lgsv_write_log_async_req_t *write_param; > > + int rc; > > > > memset(&(msg), 0, sizeof(lgsv_msg_t)); > > write_param = &msg.info.api_info.param.write_log_async; > > TRACE_ENTER(); > > > > - if (NULL == logRecord) { > > - TRACE("SA_AIS_ERR_INVALID_PARAM => NULL > pointer check"); > > - rc = SA_AIS_ERR_INVALID_PARAM; > > + if ((ackFlags != 0) && (ackFlags != > SA_LOG_RECORD_WRITE_ACK)) { > > + TRACE("SA_AIS_ERR_BAD_FLAGS=> ackFlags"); > > + ais_rc = SA_AIS_ERR_BAD_FLAGS; > > goto done; > > } > > > > - if (logRecord->logHdrType == SA_LOG_GENERIC_HEADER) { > > - switch (logRecord- > >logHeader.genericHdr.logSeverity) { > > - case SA_LOG_SEV_EMERGENCY: > > - case SA_LOG_SEV_ALERT: > > - case SA_LOG_SEV_CRITICAL: > > - case SA_LOG_SEV_ERROR: > > - case SA_LOG_SEV_WARNING: > > - case SA_LOG_SEV_NOTICE: > > - case SA_LOG_SEV_INFO: > > - break; > > - default: > > - TRACE("Invalid severity: %x", > > logRecord->logHeader.genericHdr.logSeverity); > > - rc = > SA_AIS_ERR_INVALID_PARAM; > > - goto done; > > - } > > - } > > - > > - if (logRecord->logBuffer != NULL) { > > - if ((logRecord->logBuffer->logBuf == NULL) && > > (logRecord->logBuffer->logBufSize != 0)) { > > - TRACE("logBuf == NULL && > logBufSize != 0"); > > - rc = > SA_AIS_ERR_INVALID_PARAM; > > - goto done; > > - } > > - } > > - > > - if ((ackFlags != 0) && (ackFlags != > SA_LOG_RECORD_WRITE_ACK)) { > > - TRACE("SA_AIS_ERR_BAD_FLAGS=> ackFlags"); > > - rc = SA_AIS_ERR_BAD_FLAGS; > > + /* Validate the log record and if generic header add > > + * logSvcUsrName from environment variable > SA_AMF_COMPONENT_NAME > > + */ > > + ais_rc = handle_log_record(logRecord, &logSvcUsrName, > write_param); > > + if (ais_rc != SA_AIS_OK) { > > + TRACE("%s: Validate Log record Fail", > __FUNCTION__); > > goto done; > > } > > > > - /* Set timeStamp data if not provided by application user */ > > - if (logRecord->logTimeStamp == SA_TIME_UNKNOWN) { > > - logTimeStamp = setLogTime(); > > - write_param->logTimeStamp = &logTimeStamp; > > - } else { > > - write_param->logTimeStamp = (SaTimeT > *)&logRecord->logTimeStamp; > > - } > > - > > - /* SA_AIS_ERR_INVALID_PARAM, bullet 2 in SAI-AIS-LOG- > A.02.01 > > - Section 3.6.3, Return Values */ > > - if (logRecord->logHdrType == SA_LOG_GENERIC_HEADER) { > > - if (logRecord- > >logHeader.genericHdr.logSvcUsrName == NULL) { > > - char *logSvcUsrChars = NULL; > > - TRACE("logSvcUsrName == > NULL"); > > - logSvcUsrChars = > getenv("SA_AMF_COMPONENT_NAME"); > > - if (logSvcUsrChars == NULL) { > > - rc = > SA_AIS_ERR_INVALID_PARAM; > > - goto done; > > - } > > - logSvcUsrName.length = > strlen(logSvcUsrChars); > > - if (logSvcUsrName.length >= > SA_MAX_NAME_LENGTH) { > > - > TRACE("SA_AMF_COMPONENT_NAME is too long"); > > - rc = > SA_AIS_ERR_INVALID_PARAM; > > - goto done; > > - } > > - strcpy((char > *)logSvcUsrName.value, logSvcUsrChars); > > - write_param->logSvcUsrName = > &logSvcUsrName; > > - } else { > > - if (logRecord- > >logHeader.genericHdr.logSvcUsrName->length >= > > SA_MAX_NAME_LENGTH) { > > - > TRACE("logSvcUsrName too long"); > > - rc = > SA_AIS_ERR_INVALID_PARAM; > > - goto done; > > - } > > - logSvcUsrName.length = > > logRecord->logHeader.genericHdr.logSvcUsrName->length; > > - write_param->logSvcUsrName = > (SaNameT > > *)logRecord->logHeader.genericHdr.logSvcUsrName; > > - } > > - } > > - > > - if (logRecord->logHdrType == SA_LOG_NTF_HEADER) { > > - if (logRecord- > >logHeader.ntfHdr.notificationObject == NULL) { > > - TRACE("notificationObject == > NULL"); > > - rc = > SA_AIS_ERR_INVALID_PARAM; > > - goto done; > > - } > > - > > - if (logRecord- > >logHeader.ntfHdr.notificationObject->length >= > > SA_MAX_NAME_LENGTH) { > > - TRACE("notificationObject.length > >= SA_MAX_NAME_LENGTH"); > > - rc = > SA_AIS_ERR_INVALID_PARAM; > > - goto done; > > - } > > - > > - if (logRecord->logHeader.ntfHdr.notifyingObject > == NULL) { > > - TRACE("notifyingObject == > NULL"); > > - rc = > SA_AIS_ERR_INVALID_PARAM; > > - goto done; > > - } > > - > > - if (logRecord- > >logHeader.ntfHdr.notifyingObject->length >= > > SA_MAX_NAME_LENGTH) { > > - TRACE("notifyingObject.length >= > SA_MAX_NAME_LENGTH"); > > - rc = > SA_AIS_ERR_INVALID_PARAM; > > - goto done; > > - } > > - } > > - > > - /* retrieve log stream hdl record */ > > + /* Retrieve log stream hdl record > > + * From now on we must give stream before return > > + */ > > lstr_hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA, > logStreamHandle); > > if (lstr_hdl_rec == NULL) { > > - TRACE("ncshm_take_hdl logStreamHandle > FAILED!"); > > - rc = SA_AIS_ERR_BAD_HANDLE; > > + TRACE("%s: ncshm_take_hdl logStreamHandle > FAILED!", > > + __FUNCTION__); > > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > > goto done; > > } > > > > /* SA_AIS_ERR_INVALID_PARAM, bullet 1 in SAI-AIS-LOG- > A.02.01 > > Section 3.6.3, Return Values */ > > if (lstr_hdl_rec->log_header_type != logRecord->logHdrType) > { > > - TRACE("lstr_hdl_rec->log_header_type != > logRecord->logHdrType"); > > - ncshm_give_hdl(logStreamHandle); > > - rc = SA_AIS_ERR_INVALID_PARAM; > > - goto done; > > + TRACE("%s: lstr_hdl_rec->log_header_type != > > logRecord->logHdrType", > > + __FUNCTION__); > > + ais_rc = SA_AIS_ERR_INVALID_PARAM; > > + goto done_give_hdl_stream; > > } > > > > - /* retrieve the lga client hdl record */ > > + /* retrieve the lga client hdl record > > + * From now on we must give both stream and client (parent) > handle > > + * before return > > + */ > > hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA, > > lstr_hdl_rec->parent_hdl->local_hdl); > > if (hdl_rec == NULL) { > > - TRACE("ncshm_take_hdl logHandle FAILED!"); > > - ncshm_give_hdl(logStreamHandle); > > - rc = SA_AIS_ERR_LIBRARY; > > - goto done; > > + TRACE("%s: ncshm_take_hdl logHandle > FAILED!", > > + __FUNCTION__); > > + ais_rc = SA_AIS_ERR_LIBRARY; > > + goto done_give_hdl_stream; > > } > > > > if ((hdl_rec->reg_cbk.saLogWriteLogCallback == NULL) && > (ackFlags == > > SA_LOG_RECORD_WRITE_ACK)) { > > - TRACE("Write Callback not registered"); > > - ncshm_give_hdl(logStreamHandle); > > - ncshm_give_hdl(hdl_rec->local_hdl); > > - rc = SA_AIS_ERR_INIT; > > - goto done; > > + TRACE("%s: Write Callback not registered", > __FUNCTION__); > > + ais_rc = SA_AIS_ERR_INIT; > > + goto done_give_hdl_all; > > } > > > > + /*** > > + * Handle states > > + * Synchronize with mds and recovery thread (mutex) > > + */ > > + pthread_mutex_lock(&lga_cb.cb_lock); > > + lgs_state_t lgs_state = lga_cb.lgs_state; > > + lga_state_t lga_state = lga_cb.lga_state; > > + pthread_mutex_unlock(&lga_cb.cb_lock); > > > > - /* Check Whether LGS is up or not */ > > - if (!lga_cb.lgs_up) { > > - ncshm_give_hdl(logStreamHandle); > > - ncshm_give_hdl(hdl_rec->local_hdl); > > - TRACE("LGS down"); > > - rc = SA_AIS_ERR_TRY_AGAIN; > > - goto done; > > + if (lgs_state == LGS_NO_ACTIVE) { > > + /* We have a server but it is temporarily > unavailable. > > + * Client may try again > > + */ > > + TRACE("%s: LGS no active", __FUNCTION__); > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > + goto done_give_hdl_all; > > + } > > + > > + if (lga_state == LGA_NO_SERVER) { > > + /* We have no server and cannot write. The > client may try again > > + */ > > + TRACE("\t LGA_NO_SERVER"); > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > + goto done_give_hdl_all; > > + } > > + > > + if (lga_state == LGA_RECOVERY2) { > > + /* Auto recovery is ongoing. We have to wait for > it to finish. > > + * The client may try again > > + */ > > + TRACE("\t LGA_RECOVERY2"); > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > + goto done_give_hdl_all; > > + } > > + > > + if (lga_state == LGA_RECOVERY1) { > > + /* We are in recovery 1 state. > > + * Recover client and execute the request > > + */ > > + TRACE("\t LGA_RECOVERY1"); > > + rc = recover_one_client(hdl_rec); > > + if (rc == -1) { > > + /* Client could not be recovered. > Delete client and > > + * return BAD HANDLE > > + */ > > + TRACE("\t recover_one_client > Fail"); > > + /* The log stream handle is not > released in > > + * delete_one_client() so we > have to do it here. > > + * The function is used in other > APIs that does not > > + * take a log stream handle > > + */ > > + > ncshm_give_hdl(logStreamHandle); > > + (void) > delete_one_client(&lga_cb.client_list, hdl_rec); > > + ais_rc = > SA_AIS_ERR_BAD_HANDLE; > > + /* Handles are destroyed so we > shall not give handles */ > > + goto done; > > + } > > } > > > > /** populate the mds message to send across to the LGS > > @@ -955,18 +1236,20 @@ SaAisErrorT saLogWriteLogAsync(SaLogStre > > /** Send the message out to the LGS > > **/ > > if (NCSCC_RC_SUCCESS != > lga_mds_msg_async_send(&lga_cb, &msg, > > MDS_SEND_PRIORITY_MEDIUM)) > > - rc = SA_AIS_ERR_TRY_AGAIN; > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > > > - /** Give all the handles that were taken **/ > > - ncshm_give_hdl(lstr_hdl_rec->log_stream_hdl); > > + done_give_hdl_all: > > ncshm_give_hdl(hdl_rec->local_hdl); > > + done_give_hdl_stream: > > + ncshm_give_hdl(logStreamHandle); > > > > - done: > > +done: > > TRACE_LEAVE(); > > - return rc; > > + return ais_rc; > > } > > > > /** > > + * API function for closing stream > > * > > * @param logStreamHandle > > * > > @@ -977,34 +1260,85 @@ SaAisErrorT saLogStreamClose(SaLogStream > > lga_log_stream_hdl_rec_t *lstr_hdl_rec; > > lga_client_hdl_rec_t *hdl_rec; > > lgsv_msg_t msg, *o_msg = NULL; > > - SaAisErrorT rc = SA_AIS_OK; > > + SaAisErrorT ais_rc = SA_AIS_OK; > > uint32_t mds_rc; > > + int rc; > > > > TRACE_ENTER(); > > > > lstr_hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA, > logStreamHandle); > > if (lstr_hdl_rec == NULL) { > > TRACE("ncshm_take_hdl logStreamHandle "); > > - rc = SA_AIS_ERR_BAD_HANDLE; > > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > > goto done; > > } > > > > + /*** > > + * Handle states > > + * Synchronize with mds and recovery thread (mutex) > > + */ > > + pthread_mutex_lock(&lga_cb.cb_lock); > > + lgs_state_t lgs_state = lga_cb.lgs_state; > > + lga_state_t lga_state = lga_cb.lga_state; > > + pthread_mutex_unlock(&lga_cb.cb_lock); > > + > > + if (lgs_state == LGS_NO_ACTIVE) { > > + /* We have a server but it is temporarily > unavailable. Client may > > + * try again > > + */ > > + TRACE("%s LGS no active", __FUNCTION__); > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > + goto done_give_hdl_stream; > > + } > > + > > + if (lga_state == LGA_NO_SERVER) { > > + /* We have no server and cannot write. The > client may try again > > + */ > > + TRACE("%s No server", __FUNCTION__); > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > + goto done_give_hdl_stream; > > + } > > + > > + if (lga_state == LGA_RECOVERY2) { > > + /* Auto recovery is ongoing. We have to wait for > it to finish. > > + * The client may try again > > + */ > > + TRACE("%s LGA auto recovery ongoing (2)", > __FUNCTION__); > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > + goto done_give_hdl_stream; > > + } > > + > > /* retrieve the client hdl record */ > > hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA, > > lstr_hdl_rec->parent_hdl->local_hdl); > > if (hdl_rec == NULL) { > > TRACE("ncshm_take_hdl logHandle "); > > - rc = SA_AIS_ERR_LIBRARY; > > + ais_rc = SA_AIS_ERR_LIBRARY; > > goto done_give_hdl_stream; > > } > > > > - /* Check Whether LGS is up or not */ > > - if (!lga_cb.lgs_up) { > > - TRACE("LGS is down"); > > - rc = SA_AIS_ERR_TRY_AGAIN; > > - goto done_give_hdl_all; > > + if (lga_state == LGA_RECOVERY1) { > > + /* We are in recovery 1 state. > > + * Recover client and execute the request > > + */ > > + rc = recover_one_client(hdl_rec); > > + if (rc == -1) { > > + /* Client could not be recovered. > Delete client and > > + * return BAD HANDLE > > + */ > > + TRACE("%s Recover Fail > delete_one_client", __FUNCTION__); > > + /* The log stream handle is not > released in > > + * delete_one_client() so we > have to do it here. > > + * The function is used in other > APIs that does not > > + * take a log stream handle > > + */ > > + > ncshm_give_hdl(logStreamHandle); > > + (void) > delete_one_client(&lga_cb.client_list, hdl_rec); > > + ais_rc = > SA_AIS_ERR_BAD_HANDLE; > > + /* Handles are destroyed so we > shall not give handles */ > > + goto done; > > + } > > } > > > > - > > /** Populate a MDS message to send to the LGS for a channel > > * close operation. > > **/ > > @@ -1018,22 +1352,22 @@ SaAisErrorT saLogStreamClose(SaLogStream > > case NCSCC_RC_SUCCESS: > > break; > > case NCSCC_RC_REQ_TIMOUT: > > - rc = SA_AIS_ERR_TIMEOUT; > > - TRACE("lga_mds_msg_sync_send FAILED: %u", > rc); > > + ais_rc = SA_AIS_ERR_TIMEOUT; > > + TRACE("lga_mds_msg_sync_send FAILED: %s", > saf_error(ais_rc)); > > goto done_give_hdl_all; > > default: > > - TRACE("lga_mds_msg_sync_send FAILED: %u", > rc); > > - rc = SA_AIS_ERR_NO_RESOURCES; > > + TRACE("lga_mds_msg_sync_send FAILED: %s", > saf_error(ais_rc)); > > + ais_rc = SA_AIS_ERR_NO_RESOURCES; > > goto done_give_hdl_all; > > } > > > > if (o_msg != NULL) { > > - rc = o_msg->info.api_resp_info.rc; > > + ais_rc = o_msg->info.api_resp_info.rc; > > lga_msg_destroy(o_msg); > > } else > > - rc = SA_AIS_ERR_NO_RESOURCES; > > + ais_rc = SA_AIS_ERR_NO_RESOURCES; > > > > - if (rc == SA_AIS_OK) { > > + if (ais_rc == SA_AIS_OK) { > > pthread_mutex_lock(&lga_cb.cb_lock); > > > > /** Delete this log stream & the associated resources with this > > @@ -1041,7 +1375,7 @@ SaAisErrorT saLogStreamClose(SaLogStream > > **/ > > if (NCSCC_RC_SUCCESS != > > lga_log_stream_hdl_rec_del(&hdl_rec->stream_list, lstr_hdl_rec)) { > > TRACE("Unable to delete log > stream"); > > - rc = SA_AIS_ERR_LIBRARY; > > + ais_rc = SA_AIS_ERR_LIBRARY; > > } > > > > pthread_mutex_unlock(&lga_cb.cb_lock); > > @@ -1054,7 +1388,7 @@ SaAisErrorT saLogStreamClose(SaLogStream > > > > done: > > TRACE_LEAVE(); > > - return rc; > > + return ais_rc; > > } > > > > /** > > diff --git a/osaf/libs/agents/saf/lga/lga_mds.c > > b/osaf/libs/agents/saf/lga/lga_mds.c > > --- a/osaf/libs/agents/saf/lga/lga_mds.c > > +++ b/osaf/libs/agents/saf/lga/lga_mds.c > > @@ -17,6 +17,7 @@ > > > > #include <stdlib.h> > > #include "lga.h" > > +#include "lga_state.h" > > > > static MDS_CLIENT_MSG_FORMAT_VER > > > LGA_WRT_LGS_MSG_FMT_ARRAY[LGA_WRT_LGS_SUBPART_VER_RANGE] > = { > > @@ -478,38 +479,56 @@ static uint32_t lga_lgs_msg_proc(lga_cb_ > > > > > ********************************************************** > ********************/ > > static uint32_t lga_mds_svc_evt(struct ncsmds_callback_info > > *mds_cb_info) > > { > > - TRACE_2("LGA Rcvd MDS subscribe evt from svc %d \n", > > mds_cb_info->info.svc_evt.i_svc_id); > > + TRACE_ENTER(); > > > > switch (mds_cb_info->info.svc_evt.i_change) { > > case NCSMDS_NO_ACTIVE: > > + TRACE("%s\t NCSMDS_NO_ACTIVE", > __FUNCTION__); > > + /* This is a temporary server down e.g. during > switch/fail over*/ > > + if (mds_cb_info->info.svc_evt.i_svc_id == > NCSMDS_SVC_ID_LGS) { > > + > pthread_mutex_lock(&lga_cb.cb_lock); > > + TRACE("NCSMDS_NO_ACTIVE"); > > + > > + memset(&lga_cb.lgs_mds_dest, > 0, sizeof(MDS_DEST)); > > + lga_cb.lgs_state = > LGS_NO_ACTIVE; > > + > pthread_mutex_unlock(&lga_cb.cb_lock); > > + } > > + break; > > case NCSMDS_DOWN: > > + TRACE("%s\t NCSMDS_DOWN", > __FUNCTION__); > > + /* This may be a loss of server where all client > and stream > > information > > + * is lost on server side. In this situation client > info in agent > > is > > + * no longer valid and clients must register again > (initialize) > > + */ > > if (mds_cb_info->info.svc_evt.i_svc_id == > NCSMDS_SVC_ID_LGS) { > > - /** TBD what to do if LGS goes down > > - ** Hold on to the subscription if possible > > - ** to send them out if LGS comes back up > > - **/ > > + > pthread_mutex_lock(&lga_cb.cb_lock); > > TRACE("LGS down"); > > - > pthread_mutex_lock(&lga_cb.cb_lock); > > memset(&lga_cb.lgs_mds_dest, > 0, sizeof(MDS_DEST)); > > - lga_cb.lgs_up = 0; > > + lga_cb.lgs_state = LGS_DOWN; > > > pthread_mutex_unlock(&lga_cb.cb_lock); > > + > > + /* The log server is lost */ > > + lga_no_server_state_set(); > > } > > break; > > case NCSMDS_NEW_ACTIVE: > > case NCSMDS_UP: > > switch (mds_cb_info->info.svc_evt.i_svc_id) { > > case NCSMDS_SVC_ID_LGS: > > + TRACE("%s\t NCSMDS_UP" , > __FUNCTION__); > > /** Store the MDS DEST of the LGS > > **/ > > - TRACE_2("MSG from LGS > NCSMDS_NEW_ACTIVE/UP"); > > > pthread_mutex_lock(&lga_cb.cb_lock); > > lga_cb.lgs_mds_dest = > mds_cb_info->info.svc_evt.i_dest; > > - lga_cb.lgs_up = 1; > > + lga_cb.lgs_state = LGS_UP; > > if (lga_cb.lgs_sync_awaited) { > > /* signal waiting > thread */ > > > m_NCS_SEL_OBJ_IND(&lga_cb.lgs_sync_sel); > > } > > > pthread_mutex_unlock(&lga_cb.cb_lock); > > + > > + /* The log server is up */ > > + lga_serv_recov1state_set(); > > break; > > default: > > break; > > @@ -519,6 +538,7 @@ static uint32_t lga_mds_svc_evt(struct n > > break; > > } > > > > + TRACE_LEAVE(); > > return NCSCC_RC_SUCCESS; > > } > > > > @@ -1141,8 +1161,9 @@ uint32_t lga_mds_msg_sync_send(lga_cb_t > > /* Retrieve the response and take ownership of > the memory */ > > *o_msg = (lgsv_msg_t > *)mds_info.info.svc_send.info.sndrsp.o_rsp; > > mds_info.info.svc_send.info.sndrsp.o_rsp = > NULL; > > - } else > > + } else { > > TRACE("lga_mds_msg_sync_send FAILED: %u", > rc); > > + } > > > > TRACE_LEAVE(); > > return rc; > > @@ -1184,8 +1205,9 @@ uint32_t lga_mds_msg_async_send(lga_cb_t > > > > /* send the message */ > > rc = ncsmds_api(&mds_info); > > - if (rc != NCSCC_RC_SUCCESS) > > - TRACE("failed"); > > + if (rc != NCSCC_RC_SUCCESS) { > > + TRACE("%s: async send failed", > __FUNCTION__); > > + } > > > > TRACE_LEAVE(); > > return rc; > > diff --git a/osaf/libs/agents/saf/lga/lga_state.c > > b/osaf/libs/agents/saf/lga/lga_state.c > > new file mode 100644 > > --- /dev/null > > +++ b/osaf/libs/agents/saf/lga/lga_state.c > > @@ -0,0 +1,670 @@ > > +/* -*- OpenSAF -*- > > + * > > + * (C) Copyright 2016 The OpenSAF Foundation > > + * > > + * This program is distributed in the hope that it will be useful, > > but > > + * WITHOUT ANY WARRANTY; without even the implied warranty of > > MERCHANTABILITY > > + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are > > licensed > > + * under the GNU Lesser General Public License Version 2.1, February > > 1999. > > + * The complete license can be accessed from the following location: > > + * http://opensource.org/licenses/lgpl-license.php > > + * See the Copying file included with the OpenSAF distribution for > > full > > + * licensing terms. > > + * > > + * Author(s): Ericsson AB > > + * > > + */ > > + > > +#include "lga_state.h" > > +#include <stdlib.h> > > +#include <syslog.h> > > +#include <pthread.h> > > +#include <osaf_poll.h> > > +#include <ncs_osprm.h> > > +#include <osaf_time.h> > > +#include <saf_error.h> > > + > > +/*** > > + * Common data > > + */ > > + > > +/* Selection object for terminating recovery thread for state 2 > > (after timeout) > > + * NOTE: Only for recovery2_thread > > + */ > > +static NCS_SEL_OBJ state2_terminate_sel_obj; > > + > > +static pthread_t recovery2_thread_id = 0; > > +static pthread_mutex_t lga_recov_mutex = > PTHREAD_MUTEX_INITIALIZER; > > + > > > +/********************************************************* > ********************* > > + * Functions used with server down recovery handling > > + > > > ********************************************************** > ********************/ > > + > > +static int start_recovery2_thread(void); > > + > > > +/********************************************************* > ********************* > > + * Functions for sending messages to the server used in the recovery > > functions > > + * Handles TRY AGAIN > > + > > > ********************************************************** > ********************/ > > + > > +/** > > + * Send an initialize message to the server. A number of retries is > > done if > > + * return code is TRY AGAIN > > + * The server returns a client id > > + * > > + * @param client_id[out] > > + * @return -1 on error (client id not valid) > > + */ > > +static int send_initialize_msg(uint32_t *client_id) > > +{ > > + SaVersionT version = { > > + .releaseCode = LOG_RELEASE_CODE, > > + .majorVersion = LOG_MAJOR_VERSION, > > + .minorVersion = LOG_MINOR_VERSION > > + }; > > + > > + SaAisErrorT ais_rc = SA_AIS_ERR_TRY_AGAIN; > > + uint32_t ncs_rc = NCSCC_RC_SUCCESS; > > + int rc = 0; > > + lgsv_msg_t i_msg, *o_msg = NULL; > > + uint32_t try_again_cnt = 10; > > + const uint32_t sleep_delay_ms = 100; > > + > > + TRACE_ENTER(); > > + > > + /* Populate the message to be sent to the LGS */ > > + memset(&i_msg, 0, sizeof(lgsv_msg_t)); > > + i_msg.type = LGSV_LGA_API_MSG; > > + i_msg.info.api_info.type = LGSV_INITIALIZE_REQ; > > + i_msg.info.api_info.param.init.version = version; > > + > > + /* Send a message to LGS to obtain a client_id > > + */ > > + while(try_again_cnt > 0) { > > + ncs_rc = lga_mds_msg_sync_send(&lga_cb, > &i_msg, &o_msg, > > + > LGS_WAIT_TIME,MDS_SEND_PRIORITY_HIGH); > > + if (ncs_rc != NCSCC_RC_SUCCESS) { > > + LOG_NO("%s > lga_mds_msg_sync_send() Fail %d", > > + __FUNCTION__, > ncs_rc); > > + rc = -1; > > + goto done; > > + } > > + > > + ais_rc = o_msg->info.api_resp_info.rc; > > + if (ais_rc == SA_AIS_ERR_TRY_AGAIN) { > > + usleep(sleep_delay_ms * 1000); > > + } else { > > + break; > > + } > > + try_again_cnt--; > > + } > > + if (SA_AIS_OK != ais_rc) { > > + TRACE("%s LGS error response %s", > __FUNCTION__, > > saf_error(ais_rc)); > > + rc = -1; > > + goto free_done; > > + } > > + > > + /* Initialize succeeded */ > > + *client_id = o_msg- > >info.api_resp_info.param.init_rsp.client_id; > > + > > +free_done: > > + /* Free up the response message */ > > + lga_msg_destroy(o_msg); > > + > > +done: > > + TRACE_LEAVE2("rc = %d", rc); > > + return rc; > > +} > > + > > +/** > > + * Send an open message to the server. A number of retries is done > > if > > + * return code is TRY AGAIN > > + * The server returns client id and a stream id > > + * > > + * @param lstream_id[out] Log stream id received from server > > + * @param p_stream[in] Pointer to a stream record > > + * @return -1 on error (stream id not valid) > > + */ > > +static int send_stream_open_msg(uint32_t *lstream_id, > > + lga_log_stream_hdl_rec_t *p_stream) > > +{ > > + SaAisErrorT ais_rc = SA_AIS_OK; > > + uint32_t ncs_rc = NCSCC_RC_SUCCESS; > > + int rc = 0; > > + lgsv_msg_t i_msg, *o_msg; > > + lgsv_stream_open_req_t *open_param; > > + lga_client_hdl_rec_t *p_client = p_stream->parent_hdl; > > + uint32_t try_again_cnt = 10; > > + const uint32_t sleep_delay_ms = 100; > > + > > + TRACE_ENTER(); > > + TRACE("\t log_stream_name \"%s\", lgs_client_id=%d", > > + p_stream->log_stream_name.value, p_client- > >lgs_client_id); > > + > > + /* Populate a stream open message to the LGS > > + */ > > + memset(&i_msg, 0, sizeof(lgsv_msg_t)); > > + open_param = &i_msg.info.api_info.param.lstr_open_sync; > > + > > + /* Set the open parameters to open a stream for recovery */ > > + open_param->client_id = p_client->lgs_client_id; > > + open_param->lstr_name = p_stream->log_stream_name; > > + open_param->logFileFmt = NULL; > > + open_param->logFileFmtLength = 0; > > + open_param->maxLogFileSize = 0; > > + open_param->maxLogRecordSize = 0; > > + open_param->haProperty = SA_FALSE; > > + open_param->logFileFullAction = 0; > > + open_param->maxFilesRotated = 0; > > + open_param->lstr_open_flags = 0; > > + > > + i_msg.type = LGSV_LGA_API_MSG; > > + i_msg.info.api_info.type = LGSV_STREAM_OPEN_REQ; > > + > > + /* Send a message to LGS to obtain a stream_id > > + */ > > + while(try_again_cnt > 0) { > > + ncs_rc = lga_mds_msg_sync_send(&lga_cb, > &i_msg, &o_msg, > > + LGS_WAIT_TIME, > MDS_SEND_PRIORITY_HIGH); > > + if (ncs_rc != NCSCC_RC_SUCCESS) { > > + rc = -1; > > + TRACE("%s > lga_mds_msg_sync_send() Fail %d", __FUNCTION__, > > ncs_rc); > > + goto done; > > + } > > + > > + ais_rc = o_msg->info.api_resp_info.rc; > > + if (ais_rc == SA_AIS_ERR_TRY_AGAIN) { > > + usleep(sleep_delay_ms * 1000); > > + } else { > > + break; > > + } > > + try_again_cnt--; > > + } > > + if (SA_AIS_OK != ais_rc) { > > + TRACE("%s LGS error response %s", > __FUNCTION__, > > saf_error(ais_rc)); > > + rc = -1; > > + goto free_done; > > + } > > + > > + /* Open succeeded */ > > + *lstream_id = > > o_msg->info.api_resp_info.param.lstr_open_rsp.lstr_id; > > + > > +free_done: > > + /* Free up the response message */ > > + lga_msg_destroy(o_msg); > > + > > +done: > > + TRACE_LEAVE2("rc = %d", rc); > > + return rc; > > +} > > + > > > +/********************************************************* > ********************* > > + * Recovery functions > > + > > > ********************************************************** > ********************/ > > + > > +/** > > + * Register an existing client with the server > > + * - Send an initialize request to the server > > + * - A new client id is received replacing the old no longer valid > > one > > + * > > + * This function shall only be used in recovery. It is assumed that > > + * lga_serv_downstate_set() has been called > > + * > > + * @param p_client[in] Pointer to a client record > > + * @return -1 on error > > + * 0 client is successfully initialized > > + * 1 client was already initialized > > + */ > > +static int initialize_one_client(lga_client_hdl_rec_t *p_client) > > +{ > > + int rc = 0; > > + uint32_t client_id; > > + > > + TRACE_ENTER(); > > + > > + if (p_client->initialized_flag == true) { > > + /* The client is already initialized */ > > + rc = 1; > > + goto done; > > + } > > + > > + rc = send_initialize_msg(&client_id); > > + if (rc == -1) { > > + TRACE("%s initialize_msg_send Fail", > __FUNCTION__); > > + } > > + > > + /* Restore the client Id with the one returned by the LGS and > > + * set the initialized flag > > + */ > > + osaf_mutex_lock_ordie(&lga_cb.cb_lock); > > + p_client->lgs_client_id = client_id; > > + p_client->initialized_flag = true; > > + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); > > + > > +done: > > + TRACE_LEAVE2("rc = %d", rc); > > + return rc; > > +} > > + > > +/** > > + * Register a stream that was opened by the client with the server > > + * - Send a stream open request to the server (no parameter list) > > + * - A new stream id is received replacing the old no longer valid > > one > > + * > > + * @param p_stream[in] Pointer to a stream record > > + * @return -1 on error > > + * 0 The stream was successfully recovered > > + * 1 The stream was already recovered > > + */ > > +static int recover_one_stream(lga_log_stream_hdl_rec_t *p_stream) > > +{ > > + int rc = 0; > > + uint32_t stream_id; > > + > > + TRACE_ENTER(); > > + > > + if (p_stream->recovered_flag == true) { > > + /* The stream is already recovered */ > > + rc = 1; > > + goto done; > > + } > > + > > + rc = send_stream_open_msg(&stream_id, p_stream); > > + if (rc == -1) { > > + TRACE("%s open_stream_msg_send Fail", > __FUNCTION__); > > + goto done; > > + } > > + > > + /* Restore the stream Id with the Id returned by the LGS and > > + * set the recovered flag > > + */ > > + osaf_mutex_lock_ordie(&lga_cb.cb_lock); > > + p_stream->lgs_log_stream_id = stream_id; > > + p_stream->recovered_flag = true; > > + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); > > + > > +done: > > + TRACE_LEAVE2("rc = %d", rc); > > + return rc; > > +} > > + > > > +/********************************************************* > ********************* > > + * Recovery 2 thread handling functions > > + > > > ********************************************************** > ********************/ > > + > > +/** > > + * Thread for handling recovery step 2 > > + * Wait for timeout or stop request > > + * After timeout, recover all not already recovered clients > > + * > > + * @param int timeout_time_in Timeout time in s > > + * @return NULL > > + */ > > +static void *recovery2_thread(void *dummy) > > +{ > > + int rc = 0; > > + struct timespec seed_ts; > > + int timeout_ms; > > + > > + TRACE_ENTER(); > > + > > + /* Create a random timeout time in msec within an interval > > + */ > > + /* Set seed. Use nanoseconds from clock */ > > + osaf_clock_gettime(CLOCK_MONOTONIC, &seed_ts); > > + srandom((unsigned int) seed_ts.tv_nsec); > > + /* Interval 400 - 500 sec */ > > + timeout_ms = (int) (random() % 100 + 400) * 1000; > > + > > + /* Wait for timeout or a signal to terminate > > + */ > > + rc = osaf_poll_one_fd(state2_terminate_sel_obj.rmv_obj, > > timeout_ms); > > + if (rc == -1) { > > + TRACE("%s osaf_poll_one_fd Fail %s", > __FUNCTION__, > > strerror(errno)); > > + goto done; > > + } > > + > > + if (rc == 0) { > > + /* Timeout; Set recovery state 2 */ > > + osaf_mutex_lock_ordie(&lga_cb.cb_lock); > > + lga_cb.lga_state = LGA_RECOVERY2; > > + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); > > + TRACE("%s Poll timeout. Enter LGA_RECOVERY2 > state", __FUNCTION__); > > + } else { > > + /* Stop signal received */ > > + TRACE("%s Stop signal received", > __FUNCTION__); > > + goto done; > > + } > > + > > + /* Execute recovery 2 > > + * Recover one client at the time > > + * If fail to recover remove the client. This means that the > client > > + * handle is invalidated > > + * When all clients are recovered or if exit is requested exit the > > + * thread > > + */ > > + TRACE("%s Execute recovery 2", __FUNCTION__); > > + /* First client */ > > + lga_client_hdl_rec_t *p_client; > > + p_client = lga_cb.client_list; > > + > > + while (p_client != NULL) { > > + /* Exit if requested to */ > > + rc = > osaf_poll_one_fd(state2_terminate_sel_obj.rmv_obj, 0); > > + if (rc > 0) { > > + /* We have been signaled to exit > */ > > + goto done; > > + } > > + /* Recover clients one at a time */ > > + rc = recover_one_client(p_client); > > + TRACE("\t Client %d is recovered", p_client- > >lgs_client_id); > > + if (rc == -1) { > > + TRACE("%s recover_one_client > Fail Deleting cllient (id %d)", > > + __FUNCTION__, > p_client->lgs_client_id); > > + /* Fail to recover this client > > + * Remove (handle invalidated) > > + */ > > + > osaf_mutex_lock_ordie(&lga_cb.cb_lock); > > + (void) > delete_one_client(&lga_cb.client_list, p_client); > > + > osaf_mutex_unlock_ordie(&lga_cb.cb_lock); > > + } > > + > > + /* Next client */ > > + p_client = p_client->next; > > + } > > + > > + /* All clients are recovered (or removed). > > + * Or recovery is aborted > > + * Change to not recovering state > > + * LGA_NORMAL > > + */ > > + osaf_mutex_lock_ordie(&lga_cb.cb_lock); > > + lga_cb.lga_state = LGA_NORMAL; > > + TRACE("\t Setting lga_state = LGA_NORMAL"); > > + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); > > + > > +done: > > + /* Cleanup and Exit thread */ > > + (void) ncs_sel_obj_destroy(&state2_terminate_sel_obj); > > + > > + TRACE_LEAVE(); > > + return NULL; > > +} > > + > > +/** > > + * Setup and start the thread for recovery state 2 > > + * > > + * @return -1 on error > > + */ > > +static int start_recovery2_thread(void) > > +{ > > + int rc = 0; > > + uint32_t ncs_rc = NCSCC_RC_SUCCESS; > > + pthread_attr_t attr; > > + > > + TRACE_ENTER(); > > + > > + pthread_attr_init(&attr); > > + pthread_attr_setdetachstate(&attr, > PTHREAD_CREATE_DETACHED); > > + > > + /* Create a selection object for signaling the recovery2 thread > */ > > + ncs_rc = ncs_sel_obj_create(&state2_terminate_sel_obj); > > + if (ncs_rc != NCSCC_RC_SUCCESS) { > > + TRACE("%s ncs_sel_obj_create Fail", > __FUNCTION__); > > + rc = -1; > > + goto done; > > + } > > + > > + /* Create the thread > > + */ > > + if (pthread_create(&recovery2_thread_id, &attr, > recovery2_thread, > > NULL) != 0) { > > + /* pthread_create() error handling */ > > + TRACE("\t pthread_create FAILED: %s", > strerror(errno)); > > + rc = -1; > > + (void) > ncs_sel_obj_destroy(&state2_terminate_sel_obj); > > + goto done; > > + } > > + pthread_attr_destroy(&attr); > > + > > +done: > > + TRACE_LEAVE2("rc = %d", rc); > > + return rc; > > +} > > + > > +/** > > + * Stops the recovery 2 thread > > + * It is safe to call this function also if the thread is not > > running > > + */ > > +static void stop_recovery2_thread(void) > > +{ > > + uint32_t ncs_rc = 0; > > + > > + TRACE_ENTER(); > > + > > + /* Check if the thread is running */ > > + osaf_mutex_lock_ordie(&lga_cb.cb_lock); > > + lga_state_t lga_state = lga_cb.lga_state; > > + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); > > + > > + if (lga_state == LGA_NORMAL) { > > + /* No thread to stop */ > > + TRACE("%s LGA_NORMAL no thread to stop", > __FUNCTION__); > > + goto done; > > + } > > + > > + /* Signal the thread to stop */ > > + ncs_rc = ncs_sel_obj_ind(&state2_terminate_sel_obj); > > + if (ncs_rc != NCSCC_RC_SUCCESS) { > > + TRACE("%s ncs_sel_obj_ind Fail", > __FUNCTION__); > > + } > > + > > + done: > > + TRACE_LEAVE(); > > + return; > > +} > > + > > > +/********************************************************* > ********************* > > + * Recovery state handling functions > > + > > > ********************************************************** > ********************/ > > + > > > +/********************************************************* > ********************* > > + * Server Down > > + * > > + * Initiate recovery handling and set LGA_NO_SERVER state > > + * LGA_NO_SERVER: State set in MDS event handler when server down > > event > > + * lga_no_server_state_set() > > + > > > ********************************************************** > ********************/ > > + > > +/** > > + * Recovery state LGA_NO_SERVER > > + * > > + * Setup server down state > > + * - Mark all clients and their streams as not recovered > > + * - Remove recovery thread if exist > > + * - Set LGA_NO_SERVER state > > + * > > + */ > > +void lga_no_server_state_set(void) > > +{ > > + lga_client_hdl_rec_t *p_client = lga_cb.client_list; > > + lga_log_stream_hdl_rec_t *p_stream; > > + > > + TRACE_ENTER(); > > + > > + /* Stop recovery thread for state 2 if exist */ > > + stop_recovery2_thread(); > > + > > + /* Set LGA_NO_SERVER state */ > > + osaf_mutex_lock_ordie(&lga_cb.cb_lock); > > + lga_cb.lga_state = LGA_NO_SERVER; > > + TRACE("\t lga_state = LGA_NO_SERVER"); > > + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); > > + > > + while (p_client != NULL) { > > + /* Set Client flags for all clients */ > > + p_client->initialized_flag = false; > > + p_client->recovered_flag = false; > > + > > + /* Set stream flags for all streams in every client > */ > > + p_stream = p_client->stream_list; > > + while (p_stream != NULL) { > > + p_stream->recovered_flag = > false; > > + > > + p_stream = p_stream->next; > > + } > > + > > + p_client = p_client->next; > > + } > > + > > + TRACE_LEAVE(); > > +} > > + > > > +/********************************************************* > ********************* > > + * Recovery state 1 > > + * > > + * Recover when needed. This means that when a client requests a > > service > > + * the client and all its open streams is recovered in server. > > + * Handler functions for this can be found in this section. > > + * LGA_RECOVERY1: State set in MDS event handler when server up > > event > > + * lga_serv_recov1state_set(). Starting recovery > > thread > > + * > > + * LGA_RECOVERY2: State set in recovery thread after timeout > > + * lga_serv_recov2state_set() > > + * > > + * LGA_NORMAL: State set when all clients are recovered. This is > > done in > > + * recovery thread when done. After setting this state > > the > > + * thread will exit > > + > > > ********************************************************** > ********************/ > > + > > +/** > > + * If previous state was LGA_NO_SERVER (headless) > > + * Start the recovery2_thread. This will start timeout timer for > > recovery 2 > > + * state. > > + * Set state to LGA_RECOVERY1 > > + */ > > +void lga_serv_recov1state_set(void) > > +{ > > + TRACE_ENTER(); > > + > > + osaf_mutex_lock_ordie(&lga_cb.cb_lock); > > + if (lga_cb.lga_state != LGA_NO_SERVER) { > > + /* We have not been headless. No recovery > shall be done */ > > + TRACE("%s Previous state was not > LGA_NO_SERVER lga_stat = %d", > > + __FUNCTION__, > lga_cb.lga_state); > > + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); > > + goto done; > > + } else { > > + lga_cb.lga_state = LGA_RECOVERY1; > > + TRACE("lga_state = %d (2->RECOVERY1)", > > + lga_cb.lga_state); > > + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); > > + } > > + > > + start_recovery2_thread(); > > + > > +done: > > + TRACE_LEAVE(); > > + return; > > +} > > + > > +/** > > + * Recover the client and all streams in its stream list > > + * - Send an initialize request to the server > > + * - Send a stream open request with no parameters to the server > > + * for each stream. > > + * - If success mark the stream and client as recovered > > + * - If fail to initiate the client or recover one of its streams > > the > > + * client shall be finalized. A finalize request is sent to the > > server. > > + * If error return code from server it is ignored. This may happen > > e.g. if > > + * the client was never initialized. > > + * > > + * The reason for finalizing the whole client if only one stream > > cannot be > > + * recovered is that the client will get BAD_HANDLE when requesting a > > service > > + * for that stream. This may cause the client to initialize a new > > client and > > + * the old client will remain as a resource leek. > > + * > > + * @param p_client[in] Pointer to a client record > > + * @return -1 on error > > + */ > > +int recover_one_client(lga_client_hdl_rec_t *p_client) > > +{ > > + int rc = 0; > > + lga_log_stream_hdl_rec_t *p_stream; > > + > > + TRACE_ENTER(); > > + > > + /* This function may be called both in the recovery thread and > in > > the > > + * client thread. A possible scenario is that the recovery 2 > thread > > + * times out and calls this function in recovery mode 2 while > there > > + * a client recovery is still ongoing started in mode 1. The > > recovery 2 > > + * thread must wait until ongoing recovery is done > > + */ > > + osaf_mutex_lock_ordie(&lga_recov_mutex); > > + /* We may have been waiting at mutex while the client was > recovered > > + * so it may already been recovered. > > + */ > > + if (p_client->recovered_flag == true) { > > + /* Client is already recovered */ > > + TRACE("\t Already recovered"); > > + goto done; > > + } > > + > > + rc = initialize_one_client(p_client); > > + if (rc == -1) { > > + TRACE("%s initialize_one_client() Fail client Id > %d", > > + __FUNCTION__, p_client- > >lgs_client_id); > > + goto done; > > + } > > + > > + /* Recover all streams registered with this client */ > > + p_stream = p_client->stream_list; > > + while (p_stream != NULL) { > > + TRACE("\t Recover client=%d, stream=%d", > > + p_client->lgs_client_id, p_stream- > >lgs_log_stream_id); > > + rc = recover_one_stream(p_stream); > > + if (rc == -1) { > > + TRACE("%s > recover_one_stream() Fail " > > + "client Id %d stream Id %d", > __FUNCTION__, > > + p_client- > >lgs_client_id, > > + p_stream- > >lgs_log_stream_id); > > + goto done; > > + } > > + > > + p_stream = p_stream->next; > > + } > > + > > + osaf_mutex_lock_ordie(&lga_cb.cb_lock); > > + p_client->recovered_flag = true; > > + osaf_mutex_unlock_ordie(&lga_cb.cb_lock); > > + > > +done: > > + osaf_mutex_unlock_ordie(&lga_recov_mutex); > > + > > + TRACE_LEAVE(); > > + return rc; > > +} > > + > > +/** > > + * Delete one client > > + * Wrapper for function lga_hdl_rec_del() is used (lga_utol.c) > > + * This wrapper adds a control to make sure that the function cannot > > + * be used by the recovery 2 thread and the client thread at the same > > time > > + * > > + * @param list_head > > + * @param rm_node > > + */ > > +uint32_t delete_one_client( > > + lga_client_hdl_rec_t **list_head, > > + lga_client_hdl_rec_t *rm_node > > + ) > > +{ > > + TRACE_ENTER2(); > > + uint32_t ncs_rc; > > + > > + osaf_mutex_lock_ordie(&lga_recov_mutex); > > + ncs_rc = lga_hdl_rec_del(list_head, rm_node); > > + osaf_mutex_unlock_ordie(&lga_recov_mutex); > > + > > + TRACE_LEAVE(); > > + return ncs_rc; > > +} > > diff --git a/osaf/libs/agents/saf/lga/lga_state.h > > b/osaf/libs/agents/saf/lga/lga_state.h > > new file mode 100644 > > --- /dev/null > > +++ b/osaf/libs/agents/saf/lga/lga_state.h > > @@ -0,0 +1,41 @@ > > +/* -*- OpenSAF -*- > > + * > > + * (C) Copyright 2016 The OpenSAF Foundation > > + * > > + * This program is distributed in the hope that it will be useful, > > but > > + * WITHOUT ANY WARRANTY; without even the implied warranty of > > MERCHANTABILITY > > + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are > > licensed > > + * under the GNU Lesser General Public License Version 2.1, February > > 1999. > > + * The complete license can be accessed from the following location: > > + * http://opensource.org/licenses/lgpl-license.php > > + * See the Copying file included with the OpenSAF distribution for > > full > > + * licensing terms. > > + * > > + * Author(s): Ericsson AB > > + * > > + */ > > + > > +#ifndef LGA_STATE_H > > +#define LGA_STATE_H > > + > > +#ifdef __cplusplus > > +extern "C" { > > +#endif > > + > > +#include "lga.h" > > + > > +/* Recovery functions */ > > +void lga_no_server_state_set(void); > > +void lga_serv_recov1state_set(void); > > +int recover_one_client(lga_client_hdl_rec_t *p_client); > > +uint32_t delete_one_client( > > + lga_client_hdl_rec_t **list_head, > > + lga_client_hdl_rec_t *rm_node > > + ); > > + > > +#ifdef __cplusplus > > +} > > +#endif > > + > > +#endif /* LGA_STATE_H */ > > + > > diff --git a/osaf/libs/agents/saf/lga/lga_util.c > > b/osaf/libs/agents/saf/lga/lga_util.c > > --- a/osaf/libs/agents/saf/lga/lga_util.c > > +++ b/osaf/libs/agents/saf/lga/lga_util.c > > @@ -44,7 +44,11 @@ static unsigned int lga_create(void) > > } > > > > /* Block and wait for indication from MDS meaning LGS is up */ > > - > osaf_poll_one_fd(m_GET_FD_FROM_SEL_OBJ(lga_cb.lgs_syn > c_sel), > > 30000); > > + > > + /* #1179 Change timeout from 30 sec (30000) to 10 sec (10000) > > + * 30 sec is probably too long for a synchronous API function > > + */ > > + > osaf_poll_one_fd(m_GET_FD_FROM_SEL_OBJ(lga_cb.lgs_syn > c_sel), > > 10000); > > > > pthread_mutex_lock(&lga_cb.cb_lock); > > lga_cb.lgs_sync_awaited = 0; > > @@ -118,12 +122,16 @@ static bool lga_clear_mbx(NCSCONTEXT arg > > static void lga_log_stream_hdl_rec_list_del(lga_log_stream_hdl_rec_t > > **plstr_hdl) > > { > > lga_log_stream_hdl_rec_t *lstr_hdl; > > + TRACE_ENTER(); > > while ((lstr_hdl = *plstr_hdl) != NULL) { > > *plstr_hdl = lstr_hdl->next; > > + TRACE("%s stream \"%s\", hdl = > %d",__FUNCTION__, > > + lstr_hdl- > >log_stream_name.value, lstr_hdl->log_stream_hdl); > > ncshm_destroy_hdl(NCS_SERVICE_ID_LGA, > lstr_hdl->log_stream_hdl); > > free(lstr_hdl); > > lstr_hdl = NULL; > > } > > + TRACE_LEAVE(); > > } > > > > > > > /********************************************************** > ****************** > > @@ -264,11 +272,13 @@ static uint32_t lga_hdl_cbk_dispatch_blo > > } > > > > /** > > - * > > + * Initiate the agent when first used. > > + * Start NCS service > > + * Register with MDS > > * > > * @return unsigned int > > */ > > -unsigned int lga_startup(void) > > +unsigned int lga_startup(lga_cb_t *cb) > > { > > unsigned int rc = NCSCC_RC_SUCCESS; > > pthread_mutex_lock(&lga_lock); > > @@ -287,8 +297,14 @@ unsigned int lga_startup(void) > > if ((rc = lga_create()) != NCSCC_RC_SUCCESS) { > > ncs_agents_shutdown(); > > goto done; > > - } else > > + } else { > > lga_use_count = 1; > > + } > > + > > + /* Agent has successfully been started including > communication > > + * with server > > + */ > > + cb->lga_state = LGA_NORMAL; > > } > > > > done: > > @@ -299,11 +315,18 @@ unsigned int lga_startup(void) > > } > > > > /** > > + * If called when only one (the last) client for this agent the > > client list a > > + * complete 'shut down' of the agent is done. > > + * - Erase the clients list. Frees all memory including list of > > open > > + * streams. > > + * - Unregister with MDS > > + * - Shut down ncs agents > > * > > + * Global lga_use_count Conatins number of registered clients > > * > > - * @return unsigned int > > + * @return unsigned int (always NCSCC_RC_SUCCESS) > > */ > > -unsigned int lga_shutdown(void) > > +unsigned int lga_shutdown_after_last_client(void) > > { > > unsigned int rc = NCSCC_RC_SUCCESS; > > > > @@ -325,6 +348,37 @@ unsigned int lga_shutdown(void) > > return rc; > > } > > > > +/** > > + * Makes a forced shut down of the agent if there are registered > > clients > > + * Makes all handles invalid and frees all resources (memory, mds) > > + * > > + * clients and the log server is down and no other recovery is > > possible. > > + * - Erase the clients list. Frees all memory including list of > > open > > + * streams. > > + * - Unregister with MDS > > + * - Shut down ncs agents > > + * - Set > > + * > > + * Global lga_use_count [in/out] = 0 > > + * > > + * @return always NCSCC_RC_SUCCESS > > + */ > > +unsigned int lga_force_shutdown(void) > > +{ > > + unsigned int rc = NCSCC_RC_SUCCESS; > > + TRACE_ENTER(); > > + pthread_mutex_lock(&lga_lock); > > + if (lga_use_count > 0) { > > + lga_destroy(); > > + rc = ncs_agents_shutdown(); /* Always returns > NCSCC_RC_SUCCESS */ > > + lga_use_count = 0; > > + TRACE("%s: Forced shutdown. Handles > invalidated\n",__FUNCTION__); > > + } > > + pthread_mutex_unlock(&lga_lock); > > + TRACE_LEAVE(); > > + return rc; > > +} > > + > > > > > /********************************************************** > ****************** > > * Name : lga_msg_destroy > > * > > @@ -387,6 +441,8 @@ void lga_hdl_list_del(lga_client_hdl_rec > > { > > lga_client_hdl_rec_t *client_hdl; > > > > + TRACE_ENTER(); > > + > > while ((client_hdl = *p_client_hdl) != NULL) { > > *p_client_hdl = client_hdl->next; > > ncshm_destroy_hdl(NCS_SERVICE_ID_LGA, > client_hdl->local_hdl); > > @@ -398,6 +454,7 @@ void lga_hdl_list_del(lga_client_hdl_rec > > free(client_hdl); > > client_hdl = 0; > > } > > + TRACE_LEAVE(); > > } > > > > > > > /********************************************************** > ****************** > > @@ -416,6 +473,7 @@ void lga_hdl_list_del(lga_client_hdl_rec > > > > > ********************************************************** > ********************/ > > uint32_t lga_log_stream_hdl_rec_del(lga_log_stream_hdl_rec_t > > **list_head, lga_log_stream_hdl_rec_t *rm_node) > > { > > + TRACE_ENTER(); > > /* Find the channel hdl record in the list of records */ > > lga_log_stream_hdl_rec_t *list_iter = *list_head; > > > > @@ -427,6 +485,7 @@ uint32_t lga_log_stream_hdl_rec_del(lga_ > > ncshm_give_hdl(rm_node->log_stream_hdl); > > ncshm_destroy_hdl(NCS_SERVICE_ID_LGA, > rm_node->log_stream_hdl); > > free(rm_node); > > + TRACE_LEAVE(); > > return NCSCC_RC_SUCCESS; > > } else { /* find the rec */ > > > > @@ -438,6 +497,7 @@ uint32_t lga_log_stream_hdl_rec_del(lga_ > > > ncshm_give_hdl(rm_node->log_stream_hdl); > > > ncshm_destroy_hdl(NCS_SERVICE_ID_LGA, rm_node- > >log_stream_hdl); > > free(rm_node); > > + TRACE_LEAVE(); > > return > NCSCC_RC_SUCCESS; > > } > > /* move onto the next one */ > > @@ -493,7 +553,6 @@ uint32_t lga_hdl_rec_del(lga_client_hdl_ > > rc = NCSCC_RC_SUCCESS; > > goto out; > > } else { /* find the rec */ > > - > > while (NULL != list_iter) { > > if (list_iter->next == rm_node) { > > list_iter->next = > rm_node->next; > > @@ -517,10 +576,9 @@ uint32_t lga_hdl_rec_del(lga_client_hdl_ > > list_iter = list_iter->next; > > } > > } > > - TRACE("failed"); > > > > out: > > - TRACE_LEAVE(); > > + TRACE_LEAVE2("rc = %d (2 <=> fail)", rc); > > return rc; > > } > > > > @@ -566,6 +624,15 @@ lga_log_stream_hdl_rec_t *lga_log_stream > > rec->log_stream_name.length = logStreamName->length; > > memcpy((void *)rec->log_stream_name.value, (void > > *)logStreamName->value, logStreamName->length); > > rec->log_header_type = log_header_type; > > + > > + /*** > > + * Initiate the recovery flag > > + * The setting means that the stream is initialized and that > there > > is > > + * no reason to recover. This setting will change if server down > is > > + * detected > > + */ > > + rec->recovered_flag = true; > > + > > /** Initialize the parent handle **/ > > rec->parent_hdl = *hdl_rec; > > > > @@ -616,6 +683,15 @@ lga_client_hdl_rec_t *lga_hdl_rec_add(lg > > **/ > > rec->lgs_client_id = client_id; > > > > + /*** > > + * Initiate the recovery flags > > + * The setting means that the client is initialized and that there > > is > > + * no reason to recover. This setting will change if server down > is > > + * detected > > + */ > > + rec->initialized_flag = true; > > + rec->recovered_flag = true; > > + > > /** Initialize and attach the IPC/Priority queue > > **/ > > if (m_NCS_IPC_CREATE(&rec->mbx) != NCSCC_RC_SUCCESS) { ------------------------------------------------------------------------------ Site24x7 APM Insight: Get Deep Visibility into Application Performance APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month Monitor end-to-end web transactions and take corrective actions now Troubleshoot faster and improve end-user experience. Signup Now! http://pubads.g.doubleclick.net/gampad/clk?id=272487151&iu=/4140 _______________________________________________ Opensaf-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/opensaf-devel
