Hi Vu,
Ack for patch 2.
B.T.w, What is the idea behind returning try_again for streamclose(), i.e. as
below?
+ if (lga_state == LGA_NO_SERVER) {
+ /* We have no server and cannot write. The client may try again
+ */
+ TRACE("%s No server", __FUNCTION__);
+ ais_rc = SA_AIS_ERR_TRY_AGAIN;
+ goto done_give_hdl_stream;
+ }
+
Mathi.
----- [email protected] wrote:
> osaf/libs/agents/saf/lga/Makefile.am | 6 +-
> osaf/libs/agents/saf/lga/lga.h | 53 +-
> osaf/libs/agents/saf/lga/lga_api.c | 858
> ++++++++++++++++++++++++----------
> osaf/libs/agents/saf/lga/lga_mds.c | 46 +-
> osaf/libs/agents/saf/lga/lga_state.c | 670
> +++++++++++++++++++++++++++
> osaf/libs/agents/saf/lga/lga_state.h | 41 +
> osaf/libs/agents/saf/lga/lga_util.c | 94 +++-
> 7 files changed, 1479 insertions(+), 289 deletions(-)
>
>
> The patch makes LOG service be able to handle the case that both SC
> nodes
> are down at the same time.
> When one or both nodes go up again the log service must be able to
> resume its work preferably without actions by the clients.
>
> A log client should not have to be aware of if one or both SC nodes
> are down.
> The only thing that should happen is that a TRY AGAIN (and in some
> cases TIMEOUT) returned.
> It is the responsibility of the client to decide how to handle this.
>
> diff --git a/osaf/libs/agents/saf/lga/Makefile.am
> b/osaf/libs/agents/saf/lga/Makefile.am
> --- a/osaf/libs/agents/saf/lga/Makefile.am
> +++ b/osaf/libs/agents/saf/lga/Makefile.am
> @@ -20,7 +20,8 @@ include $(top_srcdir)/Makefile.common
> MAINTAINERCLEANFILES = Makefile.in
>
> noinst_HEADERS = \
> - lga.h
> + lga.h \
> + lga_state.h
>
> noinst_LTLIBRARIES = liblga.la
>
> @@ -31,6 +32,7 @@ liblga_la_CPPFLAGS = \
> liblga_la_SOURCES = \
> lga_api.c \
> lga_util.c \
> - lga_mds.c
> + lga_mds.c \
> + lga_state.c
>
> liblga_la_LDFLAGS = -static
> diff --git a/osaf/libs/agents/saf/lga/lga.h
> b/osaf/libs/agents/saf/lga/lga.h
> --- a/osaf/libs/agents/saf/lga/lga.h
> +++ b/osaf/libs/agents/saf/lga/lga.h
> @@ -29,6 +29,7 @@
> #include <ncsencdec_pub.h>
> #include <ncs_util.h>
> #include <logtrace.h>
> +#include <osaf_time.h>
>
> #include "lgsv_msg.h"
> #include "lgsv_defs.h"
> @@ -49,6 +50,11 @@ typedef struct lga_log_stream_hdl_rec {
> unsigned int lgs_log_stream_id; /* server reference for this log
> stream */
> struct lga_log_stream_hdl_rec *next; /* next pointer for the list in
> lga_client_hdl_rec_t */
> struct lga_client_hdl_rec *parent_hdl; /* Back Pointer to the client
> instantiation */
> + /* This flag is used with recovery handling. It is valid only
> + * after server down has happened (will be initiated when server
> down
> + * event occurs). It's not valid in LGA_NORMAL state.
> + */
> + bool recovered_flag;
> } lga_log_stream_hdl_rec_t;
>
> /* LGA client record */
> @@ -59,8 +65,46 @@ typedef struct lga_client_hdl_rec {
> lga_log_stream_hdl_rec_t *stream_list; /* List of open streams per
> client */
> SYSF_MBX mbx; /* priority q mbx b/w MDS & Library */
> struct lga_client_hdl_rec *next; /* next pointer for the list in
> lga_cb_t */
> + /* These flags are used with recovery handling. They are valid only
> + * after server down has happened (will be initiated when server
> down
> + * event occurs). They are not valid in LGA_NORMAL state
> + */
> + bool initialized_flag; /* Used with "headless" recovery
> handling
> + * Set when client is initialized. Streams
> + * may not have been recovered
> + */
> + bool recovered_flag; /* Used with "headless" recovery
> handling
> + * Set when client is initialized an all
> + * streams are recovered
> + */
> } lga_client_hdl_rec_t;
>
> +/* States of the server */
> +typedef enum {
> + LGS_START, /* The state before agent is started
> + */
> + LGS_DOWN, /* Server is down (headless)
> + */
> + LGS_NO_ACTIVE, /* No active server (switch/fail - over)
> + */
> + LGS_UP /* Server is up
> + */
> +} lgs_state_t;
> +
> +/* Agent internal states */
> +typedef enum {
> + LGA_NORMAL, /* Server is up and no recovery is ongoing
> + */
> + LGA_NO_SERVER, /* No Server (Server down) state
> + */
> + LGA_RECOVERY1, /* Server is up. Recover clients and streams when
> + * request from client.
> + * Recovery1 timer is running
> + */
> + LGA_RECOVERY2 /* Auto recover remaining clients and streams
> + * After recovery1 timeout
> + */
> +} lga_state_t;
> /*
> * The LGA control block is the master anchor structure for all LGA
> * instantiations within a process.
> @@ -70,8 +114,8 @@ typedef struct {
> lga_client_hdl_rec_t *client_list; /* LGA client handle database */
> MDS_HDL mds_hdl; /* MDS handle */
> MDS_DEST lgs_mds_dest; /* LGS absolute/virtual address */
> - int lgs_up; /* Indicate that MDS subscription
> - * is complete */
> + lgs_state_t lgs_state; /* Indicate current server MDS state */
> + lga_state_t lga_state; /* Indicate current state of the agent */
> /* LGS LGA sync params */
> int lgs_sync_awaited;
> NCS_SEL_OBJ lgs_sync_sel;
> @@ -88,8 +132,9 @@ extern uint32_t lga_mds_msg_async_send(l
> extern void lgsv_lga_evt_free(struct lgsv_msg *);
>
> /* lga_init.c */
> -extern unsigned int lga_startup(void);
> -extern unsigned int lga_shutdown(void);
> +unsigned int lga_startup(lga_cb_t *cb);
> +extern unsigned int lga_shutdown_after_last_client(void);
> +extern unsigned int lga_force_shutdown(void);
>
> /* lga_hdl.c */
> extern SaAisErrorT lga_hdl_cbk_dispatch(lga_cb_t *,
> lga_client_hdl_rec_t *, SaDispatchFlagsT);
> diff --git a/osaf/libs/agents/saf/lga/lga_api.c
> b/osaf/libs/agents/saf/lga/lga_api.c
> --- a/osaf/libs/agents/saf/lga/lga_api.c
> +++ b/osaf/libs/agents/saf/lga/lga_api.c
> @@ -16,8 +16,11 @@
> */
>
> #include <string.h>
> +#include <saf_error.h>
> #include "lga.h"
>
> +#include "lga_state.h"
> +
> #define NCS_SAF_MIN_ACCEPT_TIME 10
>
> /* Macro to validate the dispatch flags */
> @@ -38,6 +41,8 @@
> /* The main controle block */
> lga_cb_t lga_cb = {
> .cb_lock = PTHREAD_MUTEX_INITIALIZER,
> + .lga_state = LGA_NORMAL,
> + .lgs_state = LGS_START
> };
>
> static void populate_open_params(lgsv_stream_open_req_t *open_param,
> @@ -116,20 +121,16 @@ SaAisErrorT saLogInitialize(SaLogHandleT
> {
> lga_client_hdl_rec_t *lga_hdl_rec;
> lgsv_msg_t i_msg, *o_msg;
> - SaAisErrorT rc;
> - uint32_t client_id;
> + SaAisErrorT ais_rc = SA_AIS_OK;
> + int rc;
> + uint32_t client_id = 0;
>
> TRACE_ENTER();
>
> + /* Verify parameters (log handle and that version is given) */
> if ((logHandle == NULL) || (version == NULL)) {
> TRACE("version or handle FAILED");
> - rc = SA_AIS_ERR_INVALID_PARAM;
> - goto done;
> - }
> -
> - if ((rc = lga_startup()) != NCSCC_RC_SUCCESS) {
> - TRACE("lga_startup FAILED: %u", rc);
> - rc = SA_AIS_ERR_LIBRARY;
> + ais_rc = SA_AIS_ERR_INVALID_PARAM;
> goto done;
> }
>
> @@ -144,15 +145,59 @@ SaAisErrorT saLogInitialize(SaLogHandleT
> version->releaseCode = LOG_RELEASE_CODE;
> version->majorVersion = LOG_MAJOR_VERSION;
> version->minorVersion = LOG_MINOR_VERSION;
> - lga_shutdown();
> - rc = SA_AIS_ERR_VERSION;
> + lga_shutdown_after_last_client();
> + ais_rc = SA_AIS_ERR_VERSION;
> goto done;
> }
>
> - if (!lga_cb.lgs_up) {
> - lga_shutdown();
> - TRACE("LGS server is down");
> - rc = SA_AIS_ERR_TRY_AGAIN;
> + /***
> + * Handle states
> + * Synchronize with mds and recovery thread (mutex)
> + * NOTE: Nothing to handle if recovery state 1
> + */
> + pthread_mutex_lock(&lga_cb.cb_lock);
> + lgs_state_t lgs_state = lga_cb.lgs_state;
> + lga_state_t lga_state = lga_cb.lga_state;
> + pthread_mutex_unlock(&lga_cb.cb_lock);
> +
> + if (lgs_state == LGS_NO_ACTIVE) {
> + /* We have a server but it is temporary unavailable. Client may
> + * try again
> + */
> + TRACE("%s LGS no active", __FUNCTION__);
> + ais_rc = SA_AIS_ERR_TRY_AGAIN;
> + goto done;
> + }
> +
> + if (lga_state == LGA_NO_SERVER) {
> + /* We have no server and cannot initialize.
> + * The client may try again
> + */
> + TRACE("%s No server", __FUNCTION__);
> + ais_rc = SA_AIS_ERR_TRY_AGAIN;
> + goto done;
> + }
> +
> + if (lga_state == LGA_RECOVERY2) {
> + /* Auto recovery is ongoing. We have to wait for it to finish.
> + * The client may try again
> + */
> + TRACE("%s LGA auto recovery ongoing (2)", __FUNCTION__);
> + ais_rc = SA_AIS_ERR_TRY_AGAIN;
> + goto done;
> + }
> +
> + /***
> + * Do Initiate. It's ok to initiate in recovery state 1 since we
> have a
> + * server and is not conflicting with auto recovery
> + */
> +
> + /* Initiate the client in the agent and if first client also start
> MDS
> + * etc.
> + */
> + if ((rc = lga_startup(&lga_cb)) != NCSCC_RC_SUCCESS) {
> + TRACE("lga_startup FAILED: %u", rc);
> + ais_rc = SA_AIS_ERR_LIBRARY;
> goto done;
> }
>
> @@ -165,18 +210,18 @@ SaAisErrorT saLogInitialize(SaLogHandleT
> /* Send a message to LGS to obtain a client_id/server ref id which
> is cluster
> * wide unique.
> */
> - rc = lga_mds_msg_sync_send(&lga_cb, &i_msg, &o_msg,
> LGS_WAIT_TIME,MDS_SEND_PRIORITY_HIGH);
> + rc = lga_mds_msg_sync_send(&lga_cb, &i_msg, &o_msg, LGS_WAIT_TIME,
> MDS_SEND_PRIORITY_HIGH);
> if (rc != NCSCC_RC_SUCCESS) {
> - lga_shutdown();
> - rc = SA_AIS_ERR_TRY_AGAIN;
> + lga_shutdown_after_last_client();
> + ais_rc = SA_AIS_ERR_TRY_AGAIN;
> goto done;
> }
>
> /** Make sure the LGS return status was SA_AIS_OK
> **/
> - if (SA_AIS_OK != o_msg->info.api_resp_info.rc) {
> - rc = o_msg->info.api_resp_info.rc;
> - TRACE("LGS return FAILED");
> + ais_rc = o_msg->info.api_resp_info.rc;
> + if (SA_AIS_OK != ais_rc) {
> + TRACE("%s LGS error response %s", __FUNCTION__,
> saf_error(ais_rc));
> goto err;
> }
>
> @@ -189,27 +234,26 @@ SaAisErrorT saLogInitialize(SaLogHandleT
> /* create the hdl record & store the callbacks */
> lga_hdl_rec = lga_hdl_rec_add(&lga_cb, callbacks, client_id);
> if (lga_hdl_rec == NULL) {
> - rc = SA_AIS_ERR_NO_MEMORY;
> + ais_rc = SA_AIS_ERR_NO_MEMORY;
> goto err;
> }
>
> /* pass the handle value to the appl */
> - if (SA_AIS_OK == rc)
> - *logHandle = lga_hdl_rec->local_hdl;
> + *logHandle = lga_hdl_rec->local_hdl;
>
> err:
> /* free up the response message */
> if (o_msg)
> lga_msg_destroy(o_msg);
>
> - if (rc != SA_AIS_OK) {
> - TRACE_2("LGA INIT FAILED\n");
> - lga_shutdown();
> + if (ais_rc != SA_AIS_OK) {
> + TRACE_2("%s LGA INIT FAILED\n", __FUNCTION__);
> + lga_shutdown_after_last_client();
> }
>
> done:
> - TRACE_LEAVE();
> - return rc;
> + TRACE_LEAVE2("client_id = %d", client_id);
> + return ais_rc;
> }
>
>
> /***************************************************************************
> @@ -325,6 +369,61 @@ SaAisErrorT saLogDispatch(SaLogHandleT l
> return rc;
> }
>
> +/******************************************************************************
> + * Finalize
> + * API function and help functions
> +
> ******************************************************************************/
> +
> +/**
> + * Create and send a Finalize message to the server
> + *
> + * @param hdl_rec
> + * @return AIS return code
> + */
> +static SaAisErrorT send_Finalize_msg(lga_client_hdl_rec_t *hdl_rec)
> +{
> + uint32_t mds_rc;
> + lgsv_msg_t msg, *o_msg = NULL;
> + SaAisErrorT ais_rc = SA_AIS_OK;
> +
> + TRACE_ENTER();
> +
> + memset(&msg, 0, sizeof(lgsv_msg_t));
> + msg.type = LGSV_LGA_API_MSG;
> + msg.info.api_info.type = LGSV_FINALIZE_REQ;
> + msg.info.api_info.param.finalize.client_id =
> hdl_rec->lgs_client_id;
> +
> + mds_rc = lga_mds_msg_sync_send(
> + &lga_cb, &msg,
> + &o_msg,
> + LGS_WAIT_TIME,
> + MDS_SEND_PRIORITY_MEDIUM
> + );
> + switch (mds_rc) {
> + case NCSCC_RC_SUCCESS:
> + break;
> + case NCSCC_RC_REQ_TIMOUT:
> + ais_rc = SA_AIS_ERR_TIMEOUT;
> + TRACE("lga_mds_msg_sync_send FAILED: %s", saf_error(ais_rc));
> + goto done;
> + default:
> + TRACE("lga_mds_msg_sync_send FAILED: %s", saf_error(ais_rc));
> + ais_rc = SA_AIS_ERR_NO_RESOURCES;
> + goto done;
> + }
> +
> + if (o_msg != NULL) {
> + ais_rc = o_msg->info.api_resp_info.rc;
> + lga_msg_destroy(o_msg);
> + } else
> + ais_rc = SA_AIS_ERR_NO_RESOURCES;
> +
> + done:
> +
> + TRACE_LEAVE();
> + return ais_rc;
> +}
> +
>
> /***************************************************************************
> * 8.4.4
> *
> @@ -350,84 +449,119 @@ SaAisErrorT saLogDispatch(SaLogHandleT l
> SaAisErrorT saLogFinalize(SaLogHandleT logHandle)
> {
> lga_client_hdl_rec_t *hdl_rec;
> - lgsv_msg_t msg, *o_msg = NULL;
> - SaAisErrorT rc = SA_AIS_OK;
> - uint32_t mds_rc;
> + SaAisErrorT ais_rc = SA_AIS_OK;
> + uint32_t rc;
>
> TRACE_ENTER();
>
> /* retrieve hdl rec */
> hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA, logHandle);
> if (hdl_rec == NULL) {
> - TRACE("ncshm_take_hdl failed");
> - rc = SA_AIS_ERR_BAD_HANDLE;
> + TRACE("%s ncshm_take_hdl failed", __FUNCTION__);
> + ais_rc = SA_AIS_ERR_BAD_HANDLE;
> goto done;
> }
>
> - /* Check Whether LGS is up or not */
> - if (!lga_cb.lgs_up) {
> - TRACE("LGS down");
> - rc = SA_AIS_ERR_TRY_AGAIN;
> + /***
> + * Handle states
> + * Synchronize with mds and recovery thread (mutex)
> + */
> + pthread_mutex_lock(&lga_cb.cb_lock);
> + lgs_state_t lgs_state = lga_cb.lgs_state;
> + lga_state_t lga_state = lga_cb.lga_state;
> + pthread_mutex_unlock(&lga_cb.cb_lock);
> +
> + if (lgs_state == LGS_NO_ACTIVE) {
> + /* We have a server but it is temporary unavailable. Client may
> + * try again
> + */
> + TRACE("%s lgs_state = LGS no active", __FUNCTION__);
> + ais_rc = SA_AIS_ERR_TRY_AGAIN;
> goto done_give_hdl;
> }
>
> - /** populate & send the finalize message
> - ** and make sure the finalize from the server
> - ** end returned before deleting the local records.
> - **/
> - memset(&msg, 0, sizeof(lgsv_msg_t));
> - msg.type = LGSV_LGA_API_MSG;
> - msg.info.api_info.type = LGSV_FINALIZE_REQ;
> - msg.info.api_info.param.finalize.client_id =
> hdl_rec->lgs_client_id;
> -
> - mds_rc = lga_mds_msg_sync_send(&lga_cb, &msg, &o_msg,
> LGS_WAIT_TIME,MDS_SEND_PRIORITY_MEDIUM);
> - switch (mds_rc) {
> - case NCSCC_RC_SUCCESS:
> - break;
> - case NCSCC_RC_REQ_TIMOUT:
> - rc = SA_AIS_ERR_TIMEOUT;
> - TRACE("lga_mds_msg_sync_send FAILED: %u", rc);
> - goto done_give_hdl;
> - default:
> - TRACE("lga_mds_msg_sync_send FAILED: %u", rc);
> - rc = SA_AIS_ERR_NO_RESOURCES;
> + if (lga_state == LGA_NO_SERVER) {
> + /* We have no server but can still finalize client.
> + * In this situation no message to server is sent
> + */
> + TRACE("%s lga_state = LGS down", __FUNCTION__);
> + ais_rc = SA_AIS_OK;
> goto done_give_hdl;
> }
>
> - if (o_msg != NULL) {
> - rc = o_msg->info.api_resp_info.rc;
> - lga_msg_destroy(o_msg);
> - } else
> - rc = SA_AIS_ERR_NO_RESOURCES;
> + if (lga_state == LGA_RECOVERY2) {
> + /* Auto recovery is ongoing. We have to wait for it to finish.
> + * The client may try again
> + */
> + TRACE("%s lga_state = LGA auto recovery ongoing (2)",
> __FUNCTION__);
> + ais_rc = SA_AIS_ERR_TRY_AGAIN;
> + goto done_give_hdl;
> + }
>
> - if (rc == SA_AIS_OK) {
> - rc = lga_hdl_rec_del(&lga_cb.client_list, hdl_rec);
> - if (rc != NCSCC_RC_SUCCESS)
> - rc = SA_AIS_ERR_BAD_HANDLE;
> + if (lga_state == LGA_RECOVERY1) {
> + /* We are in recovery state 1. Client may or may not have been
> + * initialized. If initialized a finalize request must be sent
> + * to the server else the client is finalized in the agent only
> + */
> + TRACE("%s lga_state = Recovery state (1)", __FUNCTION__);
> + if (hdl_rec->initialized_flag == false) {
> + TRACE("\t Client is not initialized");
> + goto done_give_hdl;
> + }
> + TRACE("\t Client is initialized");
> + }
> +
> + /***
> + * Populate & send the finalize message
> + * and make sure the finalize from the server
> + * end returned before deleting the local records.
> + */
> + ais_rc = send_Finalize_msg(hdl_rec);
> +
> + if (ais_rc == SA_AIS_OK) {
> + TRACE("%s delete_one_client", __FUNCTION__);
> + (void) delete_one_client(&lga_cb.client_list, hdl_rec);
> }
>
> done_give_hdl:
> ncshm_give_hdl(logHandle);
>
> - if (rc == SA_AIS_OK) {
> - rc = lga_shutdown();
> + if (ais_rc == SA_AIS_OK) {
> + rc = lga_shutdown_after_last_client();
> if (rc != NCSCC_RC_SUCCESS)
> TRACE("lga_shutdown ");
> }
>
> done:
> - TRACE_LEAVE2("rc = %u", rc);
> - return rc;
> + TRACE_LEAVE2("ais_rc = %s", saf_error(ais_rc));
> + return ais_rc;
> }
>
> -static SaAisErrorT validate_open_params(SaLogHandleT logHandle,
> - const SaNameT *logStreamName,
> - const SaLogFileCreateAttributesT_2
> *logFileCreateAttributes,
> - SaLogStreamOpenFlagsT
> logStreamOpenFlags,
> - SaTimeT timeOut, SaLogStreamHandleT
> *logStreamHandle, uint32_t
> *header_type)
> +/******************************************************************************
> + * Open a stream
> + * API function and help functions
> +
> ******************************************************************************/
> +
> +/**
> + * Check input parameters for opening a stream
> + *
> + * @param logStreamName
> + * @param logFileCreateAttributes
> + * @param logStreamOpenFlags
> + * @param logStreamHandle
> + * @param header_type
> + * @return
> + */
> +static SaAisErrorT validate_open_params(
> + const SaNameT *logStreamName,
> + const SaLogFileCreateAttributesT_2 *logFileCreateAttributes,
> + SaLogStreamOpenFlagsT logStreamOpenFlags,
> + SaLogStreamHandleT *logStreamHandle,
> + uint32_t *header_type
> + )
> {
> size_t len;
> - SaAisErrorT rc = SA_AIS_OK;
> + SaAisErrorT ais_rc = SA_AIS_OK;
>
> TRACE_ENTER();
>
> @@ -435,7 +569,7 @@ static SaAisErrorT validate_open_params(
>
> if ((NULL == logStreamName) || (NULL == logStreamHandle)) {
> TRACE("SA_AIS_ERR_INVALID_PARAM => NULL pointer check");
> - rc = SA_AIS_ERR_INVALID_PARAM;
> + ais_rc = SA_AIS_ERR_INVALID_PARAM;
> goto done;
> }
>
> @@ -450,7 +584,7 @@ static SaAisErrorT validate_open_params(
> */
> if (logStreamName->length >= SA_MAX_NAME_LENGTH) {
> TRACE("SA_AIS_ERR_INVALID_PARAM, logStreamName->length >
> SA_MAX_NAME_LENGTH");
> - rc = SA_AIS_ERR_INVALID_PARAM;
> + ais_rc = SA_AIS_ERR_INVALID_PARAM;
> goto done;
> }
>
> @@ -575,10 +709,11 @@ static SaAisErrorT validate_open_params(
>
> done:
> TRACE_LEAVE();
> - return rc;
> + return ais_rc;
> }
>
> /**
> + * API function for opening a stream
> *
> * @param logHandle
> * @param logStreamName
> @@ -599,29 +734,85 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl
> lga_client_hdl_rec_t *hdl_rec;
> lgsv_msg_t msg, *o_msg = NULL;
> lgsv_stream_open_req_t *open_param;
> - SaAisErrorT rc;
> + SaAisErrorT ais_rc;
> + int rc = 0;
> + uint32_t ncs_rc;
> uint32_t timeout;
> uint32_t log_stream_id;
> uint32_t log_header_type = 0;
>
> TRACE_ENTER();
>
> - rc = validate_open_params(logHandle, logStreamName,
> logFileCreateAttributes,
> - logStreamOpenFlags, timeOut, logStreamHandle,
> &log_header_type);
> - if (rc != SA_AIS_OK)
> + ais_rc = validate_open_params(logStreamName,
> logFileCreateAttributes,
> + logStreamOpenFlags, logStreamHandle,
> &log_header_type);
> + if (ais_rc != SA_AIS_OK)
> goto done;
>
> /* retrieve log service hdl rec */
> hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA, logHandle);
> if (hdl_rec == NULL) {
> TRACE("ncshm_take_hdl failed");
> - rc = SA_AIS_ERR_BAD_HANDLE;
> + ais_rc = SA_AIS_ERR_BAD_HANDLE;
> goto done;
> }
>
> + /***
> + * Handle states
> + * Synchronize with mds and recovery thread (mutex)
> + */
> + pthread_mutex_lock(&lga_cb.cb_lock);
> + lgs_state_t lgs_state = lga_cb.lgs_state;
> + lga_state_t lga_state = lga_cb.lga_state;
> + pthread_mutex_unlock(&lga_cb.cb_lock);
> +
> + if (lgs_state == LGS_NO_ACTIVE) {
> + /* We have a server but it is temporary unavailable. Client may
> + * try again
> + */
> + TRACE("%s LGS no active", __FUNCTION__);
> + ais_rc = SA_AIS_ERR_TRY_AGAIN;
> + goto done_give_hdl;
> + }
> +
> + if (lga_state == LGA_NO_SERVER) {
> + /* We have no server and cannot open a stream.
> + * The client may try again
> + */
> + TRACE("%s No server", __FUNCTION__);
> + ais_rc = SA_AIS_ERR_TRY_AGAIN;
> + goto done_give_hdl;
> + }
> +
> + if (lga_state == LGA_RECOVERY2) {
> + /* Auto recovery is ongoing. We have to wait for it to finish.
> + * The client may try again
> + */
> + TRACE("%s LGA auto recovery ongoing (2)", __FUNCTION__);
> + ais_rc = SA_AIS_ERR_TRY_AGAIN;
> + goto done_give_hdl;
> + }
> +
> + if (lga_state == LGA_RECOVERY1) {
> + /* We are in recovery 1 state.
> + * Recover client and execute the request
> + */
> + rc = recover_one_client(hdl_rec);
> + if (rc == -1) {
> + /* Client could not be recovered. Delete client and
> + * return BAD HANDLE
> + */
> + TRACE("%s delete_one_client", __FUNCTION__);
> + (void) delete_one_client(&lga_cb.client_list, hdl_rec);
> + ais_rc = SA_AIS_ERR_BAD_HANDLE;
> + /* Handles are destroyed so we shall not give handles */
> + goto done;
> + }
> + }
> +
> +
> /** Populate a sync MDS message to obtain a log stream id and an
> - ** instance open id.
> - **/
> + * instance open id.
> + */
> memset(&msg, 0, sizeof(lgsv_msg_t));
> msg.type = LGSV_LGA_API_MSG;
> msg.info.api_info.type = LGSV_STREAM_OPEN_REQ;
> @@ -641,7 +832,7 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl
> /* Construct the logFileName */
> open_param->logFileName = (char *)
> malloc(strlen(logFileCreateAttributes->logFileName) + 1);
> if (open_param->logFileName == NULL) {
> - rc = SA_AIS_ERR_NO_MEMORY;
> + ais_rc = SA_AIS_ERR_NO_MEMORY;
> goto done_give_hdl;
> }
> strcpy(open_param->logFileName,
> logFileCreateAttributes->logFileName);
> @@ -653,7 +844,7 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl
>
> open_param->logFilePathName = (char *) malloc(len);
> if (open_param->logFilePathName == NULL) {
> - rc = SA_AIS_ERR_NO_MEMORY;
> + ais_rc = SA_AIS_ERR_NO_MEMORY;
> goto done_give_hdl;
> }
>
> @@ -668,31 +859,21 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl
>
> if (timeout < NCS_SAF_MIN_ACCEPT_TIME) {
> TRACE("Timeout");
> - rc = SA_AIS_ERR_TIMEOUT;
> - goto done_give_hdl;
> - }
> -
> - /* Check whether LGS is up or not */
> - if (!lga_cb.lgs_up) {
> - TRACE("LGS down");
> - rc = SA_AIS_ERR_TRY_AGAIN;
> + ais_rc = SA_AIS_ERR_TIMEOUT;
> goto done_give_hdl;
> }
>
> /* Send a sync MDS message to obtain a log stream id */
> - rc = lga_mds_msg_sync_send(&lga_cb, &msg, &o_msg,
> timeout,MDS_SEND_PRIORITY_HIGH);
> - if (rc != NCSCC_RC_SUCCESS) {
> - if (o_msg)
> - lga_msg_destroy(o_msg);
> - rc = SA_AIS_ERR_TRY_AGAIN;
> + ncs_rc = lga_mds_msg_sync_send(&lga_cb, &msg, &o_msg, timeout,
> MDS_SEND_PRIORITY_HIGH);
> + if (ncs_rc != NCSCC_RC_SUCCESS) {
> + ais_rc = SA_AIS_ERR_TRY_AGAIN;
> goto done_give_hdl;
> }
>
> - if (SA_AIS_OK != o_msg->info.api_resp_info.rc) {
> - rc = o_msg->info.api_resp_info.rc;
> - TRACE("Bad return status!!! rc = %d", rc);
> - if (o_msg)
> - lga_msg_destroy(o_msg);
> + ais_rc = o_msg->info.api_resp_info.rc;
> + if (SA_AIS_OK != ais_rc) {
> + TRACE("Bad return status!!! rc = %d", ais_rc);
> + lga_msg_destroy(o_msg);
> goto done_give_hdl;
> }
>
> @@ -708,25 +889,28 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl
> /** Allocate an LGA_LOG_STREAM_HDL_REC structure and insert this
> * into the list of channel hdl record.
> **/
> - lstr_hdl_rec = lga_log_stream_hdl_rec_add(&hdl_rec,
> - log_stream_id,
> logStreamOpenFlags, logStreamName,
> log_header_type);
> + lstr_hdl_rec = lga_log_stream_hdl_rec_add(
> + &hdl_rec,
> + log_stream_id, logStreamOpenFlags,
> + logStreamName, log_header_type
> + );
> if (lstr_hdl_rec == NULL) {
> pthread_mutex_unlock(&lga_cb.cb_lock);
> - if (o_msg)
> - lga_msg_destroy(o_msg);
> - rc = SA_AIS_ERR_NO_MEMORY;
> + lga_msg_destroy(o_msg);
> + ais_rc = SA_AIS_ERR_NO_MEMORY;
> goto done_give_hdl;
> }
> +
> /** UnLock LGA_CB
> **/
> pthread_mutex_unlock(&lga_cb.cb_lock);
>
> - /** Give the hdl-mgr allocated hdl to the application.
> - **/
> + /** Give the hdl-mgr allocated hdl to the application and free the
> response
> + * message
> + **/
> *logStreamHandle =
> (SaLogStreamHandleT)lstr_hdl_rec->log_stream_hdl;
>
> - if (o_msg)
> - lga_msg_destroy(o_msg);
> + lga_msg_destroy(o_msg);
>
> done_give_hdl:
> ncshm_give_hdl(logHandle);
> @@ -735,7 +919,7 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl
>
> done:
> TRACE_LEAVE();
> - return rc;
> + return ais_rc;
> }
>
> /**
> @@ -758,6 +942,136 @@ SaAisErrorT saLogStreamOpenAsync_2(SaLog
> return SA_AIS_ERR_NOT_SUPPORTED;
> }
>
> +/******************************************************************************
> + * Write Log record
> + * API function and help functions
> +
> ******************************************************************************/
> +
> +/**
> + * Validate the log record and if generic header add
> + * logSvcUsrName from environment variable SA_AMF_COMPONENT_NAME
> + *
> + * @param logRecord[in]
> + * @param logSvcUsrName[out]
> + * @param write_param[out]
> + * @return AIS return code
> + */
> +static SaAisErrorT handle_log_record(const SaLogRecordT *logRecord,
> + SaNameT *logSvcUsrName,
> + lgsv_write_log_async_req_t *write_param)
> +{
> + SaAisErrorT ais_rc = SA_AIS_OK;
> + SaTimeT logTimeStamp;
> +
> + TRACE_ENTER();
> +
> + if (NULL == logRecord) {
> + TRACE("SA_AIS_ERR_INVALID_PARAM => NULL pointer check");
> + ais_rc = SA_AIS_ERR_INVALID_PARAM;
> + goto done;
> + }
> +
> + if (logRecord->logHdrType == SA_LOG_GENERIC_HEADER) {
> + switch (logRecord->logHeader.genericHdr.logSeverity) {
> + case SA_LOG_SEV_EMERGENCY:
> + case SA_LOG_SEV_ALERT:
> + case SA_LOG_SEV_CRITICAL:
> + case SA_LOG_SEV_ERROR:
> + case SA_LOG_SEV_WARNING:
> + case SA_LOG_SEV_NOTICE:
> + case SA_LOG_SEV_INFO:
> + break;
> + default:
> + TRACE("Invalid severity: %x",
> + logRecord->logHeader.genericHdr.logSeverity);
> + ais_rc = SA_AIS_ERR_INVALID_PARAM;
> + goto done;
> + }
> + }
> +
> + if (logRecord->logBuffer != NULL) {
> + if ((logRecord->logBuffer->logBuf == NULL) &&
> + (logRecord->logBuffer->logBufSize != 0)) {
> + TRACE("logBuf == NULL && logBufSize != 0");
> + ais_rc = SA_AIS_ERR_INVALID_PARAM;
> + goto done;
> + }
> + }
> +
> + /* Set timeStamp data if not provided by application user */
> + if (logRecord->logTimeStamp == SA_TIME_UNKNOWN) {
> + logTimeStamp = setLogTime();
> + write_param->logTimeStamp = &logTimeStamp;
> + } else {
> + write_param->logTimeStamp = (SaTimeT *)&logRecord->logTimeStamp;
> + }
> +
> + /* SA_AIS_ERR_INVALID_PARAM, bullet 2 in SAI-AIS-LOG-A.02.01
> + Section 3.6.3, Return Values */
> + if (logRecord->logHdrType == SA_LOG_GENERIC_HEADER) {
> + if (logRecord->logHeader.genericHdr.logSvcUsrName == NULL) {
> + char *logSvcUsrChars = NULL;
> + TRACE("logSvcUsrName == NULL");
> + logSvcUsrChars = getenv("SA_AMF_COMPONENT_NAME");
> + if (logSvcUsrChars == NULL) {
> + ais_rc = SA_AIS_ERR_INVALID_PARAM;
> + goto done;
> + }
> + logSvcUsrName->length = strlen(logSvcUsrChars);
> + if (logSvcUsrName->length >= SA_MAX_NAME_LENGTH) {
> + TRACE("SA_AMF_COMPONENT_NAME is too long");
> + ais_rc = SA_AIS_ERR_INVALID_PARAM;
> + goto done;
> + }
> + strcpy((char *)logSvcUsrName->value, logSvcUsrChars);
> + write_param->logSvcUsrName = logSvcUsrName;
> + } else {
> + if
> (logRecord->logHeader.genericHdr.logSvcUsrName->length >=
> + SA_MAX_NAME_LENGTH) {
> + TRACE("logSvcUsrName too long");
> + ais_rc = SA_AIS_ERR_INVALID_PARAM;
> + goto done;
> + }
> + logSvcUsrName->length =
> +
> logRecord->logHeader.genericHdr.logSvcUsrName->length;
> + write_param->logSvcUsrName =
> + (SaNameT
> *)logRecord->logHeader.genericHdr.logSvcUsrName;
> + }
> + }
> +
> + if (logRecord->logHdrType == SA_LOG_NTF_HEADER) {
> + if (logRecord->logHeader.ntfHdr.notificationObject == NULL) {
> + TRACE("notificationObject == NULL");
> + ais_rc = SA_AIS_ERR_INVALID_PARAM;
> + goto done;
> + }
> +
> + if (logRecord->logHeader.ntfHdr.notificationObject->length >=
> + SA_MAX_NAME_LENGTH) {
> + TRACE("notificationObject.length >=
> SA_MAX_NAME_LENGTH");
> + ais_rc = SA_AIS_ERR_INVALID_PARAM;
> + goto done;
> + }
> +
> + if (logRecord->logHeader.ntfHdr.notifyingObject == NULL) {
> + TRACE("notifyingObject == NULL");
> + ais_rc = SA_AIS_ERR_INVALID_PARAM;
> + goto done;
> + }
> +
> + if (logRecord->logHeader.ntfHdr.notifyingObject->length >=
> + SA_MAX_NAME_LENGTH) {
> + TRACE("notifyingObject.length >= SA_MAX_NAME_LENGTH");
> + ais_rc = SA_AIS_ERR_INVALID_PARAM;
> + goto done;
> + }
> + }
> +
> +done:
> + TRACE_LEAVE();
> + return ais_rc;
> +}
> +
> /**
> *
> * @param logStreamHandle
> @@ -788,158 +1102,125 @@ SaAisErrorT saLogWriteLogAsync(SaLogStre
> lga_log_stream_hdl_rec_t *lstr_hdl_rec;
> lga_client_hdl_rec_t *hdl_rec;
> lgsv_msg_t msg;
> - SaAisErrorT rc = SA_AIS_OK;
> + SaAisErrorT ais_rc = SA_AIS_OK;
> + lgsv_write_log_async_req_t *write_param;
> SaNameT logSvcUsrName;
> - SaTimeT logTimeStamp;
> - lgsv_write_log_async_req_t *write_param;
> + int rc;
>
> memset(&(msg), 0, sizeof(lgsv_msg_t));
> write_param = &msg.info.api_info.param.write_log_async;
> TRACE_ENTER();
>
> - if (NULL == logRecord) {
> - TRACE("SA_AIS_ERR_INVALID_PARAM => NULL pointer check");
> - rc = SA_AIS_ERR_INVALID_PARAM;
> + if ((ackFlags != 0) && (ackFlags != SA_LOG_RECORD_WRITE_ACK)) {
> + TRACE("SA_AIS_ERR_BAD_FLAGS=> ackFlags");
> + ais_rc = SA_AIS_ERR_BAD_FLAGS;
> goto done;
> }
>
> - if (logRecord->logHdrType == SA_LOG_GENERIC_HEADER) {
> - switch (logRecord->logHeader.genericHdr.logSeverity) {
> - case SA_LOG_SEV_EMERGENCY:
> - case SA_LOG_SEV_ALERT:
> - case SA_LOG_SEV_CRITICAL:
> - case SA_LOG_SEV_ERROR:
> - case SA_LOG_SEV_WARNING:
> - case SA_LOG_SEV_NOTICE:
> - case SA_LOG_SEV_INFO:
> - break;
> - default:
> - TRACE("Invalid severity: %x",
> logRecord->logHeader.genericHdr.logSeverity);
> - rc = SA_AIS_ERR_INVALID_PARAM;
> - goto done;
> - }
> - }
> -
> - if (logRecord->logBuffer != NULL) {
> - if ((logRecord->logBuffer->logBuf == NULL) &&
> (logRecord->logBuffer->logBufSize != 0)) {
> - TRACE("logBuf == NULL && logBufSize != 0");
> - rc = SA_AIS_ERR_INVALID_PARAM;
> - goto done;
> - }
> - }
> -
> - if ((ackFlags != 0) && (ackFlags != SA_LOG_RECORD_WRITE_ACK)) {
> - TRACE("SA_AIS_ERR_BAD_FLAGS=> ackFlags");
> - rc = SA_AIS_ERR_BAD_FLAGS;
> + /* Validate the log record and if generic header add
> + * logSvcUsrName from environment variable SA_AMF_COMPONENT_NAME
> + */
> + ais_rc = handle_log_record(logRecord, &logSvcUsrName, write_param);
> + if (ais_rc != SA_AIS_OK) {
> + TRACE("%s: Validate Log record Fail", __FUNCTION__);
> goto done;
> }
>
> - /* Set timeStamp data if not provided by application user */
> - if (logRecord->logTimeStamp == SA_TIME_UNKNOWN) {
> - logTimeStamp = setLogTime();
> - write_param->logTimeStamp = &logTimeStamp;
> - } else {
> - write_param->logTimeStamp = (SaTimeT *)&logRecord->logTimeStamp;
> - }
> -
> - /* SA_AIS_ERR_INVALID_PARAM, bullet 2 in SAI-AIS-LOG-A.02.01
> - Section 3.6.3, Return Values */
> - if (logRecord->logHdrType == SA_LOG_GENERIC_HEADER) {
> - if (logRecord->logHeader.genericHdr.logSvcUsrName == NULL) {
> - char *logSvcUsrChars = NULL;
> - TRACE("logSvcUsrName == NULL");
> - logSvcUsrChars = getenv("SA_AMF_COMPONENT_NAME");
> - if (logSvcUsrChars == NULL) {
> - rc = SA_AIS_ERR_INVALID_PARAM;
> - goto done;
> - }
> - logSvcUsrName.length = strlen(logSvcUsrChars);
> - if (logSvcUsrName.length >= SA_MAX_NAME_LENGTH) {
> - TRACE("SA_AMF_COMPONENT_NAME is too long");
> - rc = SA_AIS_ERR_INVALID_PARAM;
> - goto done;
> - }
> - strcpy((char *)logSvcUsrName.value, logSvcUsrChars);
> - write_param->logSvcUsrName = &logSvcUsrName;
> - } else {
> - if
> (logRecord->logHeader.genericHdr.logSvcUsrName->length >=
> SA_MAX_NAME_LENGTH) {
> - TRACE("logSvcUsrName too long");
> - rc = SA_AIS_ERR_INVALID_PARAM;
> - goto done;
> - }
> - logSvcUsrName.length =
> logRecord->logHeader.genericHdr.logSvcUsrName->length;
> - write_param->logSvcUsrName = (SaNameT
> *)logRecord->logHeader.genericHdr.logSvcUsrName;
> - }
> - }
> -
> - if (logRecord->logHdrType == SA_LOG_NTF_HEADER) {
> - if (logRecord->logHeader.ntfHdr.notificationObject == NULL) {
> - TRACE("notificationObject == NULL");
> - rc = SA_AIS_ERR_INVALID_PARAM;
> - goto done;
> - }
> -
> - if (logRecord->logHeader.ntfHdr.notificationObject->length >=
> SA_MAX_NAME_LENGTH) {
> - TRACE("notificationObject.length >=
> SA_MAX_NAME_LENGTH");
> - rc = SA_AIS_ERR_INVALID_PARAM;
> - goto done;
> - }
> -
> - if (logRecord->logHeader.ntfHdr.notifyingObject == NULL) {
> - TRACE("notifyingObject == NULL");
> - rc = SA_AIS_ERR_INVALID_PARAM;
> - goto done;
> - }
> -
> - if (logRecord->logHeader.ntfHdr.notifyingObject->length >=
> SA_MAX_NAME_LENGTH) {
> - TRACE("notifyingObject.length >= SA_MAX_NAME_LENGTH");
> - rc = SA_AIS_ERR_INVALID_PARAM;
> - goto done;
> - }
> - }
> -
> - /* retrieve log stream hdl record */
> + /* Retrieve log stream hdl record
> + * From now on we must give stream before return
> + */
> lstr_hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA, logStreamHandle);
> if (lstr_hdl_rec == NULL) {
> - TRACE("ncshm_take_hdl logStreamHandle FAILED!");
> - rc = SA_AIS_ERR_BAD_HANDLE;
> + TRACE("%s: ncshm_take_hdl logStreamHandle FAILED!",
> + __FUNCTION__);
> + ais_rc = SA_AIS_ERR_BAD_HANDLE;
> goto done;
> }
>
> /* SA_AIS_ERR_INVALID_PARAM, bullet 1 in SAI-AIS-LOG-A.02.01
> Section 3.6.3, Return Values */
> if (lstr_hdl_rec->log_header_type != logRecord->logHdrType) {
> - TRACE("lstr_hdl_rec->log_header_type != logRecord->logHdrType");
> - ncshm_give_hdl(logStreamHandle);
> - rc = SA_AIS_ERR_INVALID_PARAM;
> - goto done;
> + TRACE("%s: lstr_hdl_rec->log_header_type !=
> logRecord->logHdrType",
> + __FUNCTION__);
> + ais_rc = SA_AIS_ERR_INVALID_PARAM;
> + goto done_give_hdl_stream;
> }
>
> - /* retrieve the lga client hdl record */
> + /* retrieve the lga client hdl record
> + * From now on we must give both stream and client (parent) handle
> + * before return
> + */
> hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA,
> lstr_hdl_rec->parent_hdl->local_hdl);
> if (hdl_rec == NULL) {
> - TRACE("ncshm_take_hdl logHandle FAILED!");
> - ncshm_give_hdl(logStreamHandle);
> - rc = SA_AIS_ERR_LIBRARY;
> - goto done;
> + TRACE("%s: ncshm_take_hdl logHandle FAILED!",
> + __FUNCTION__);
> + ais_rc = SA_AIS_ERR_LIBRARY;
> + goto done_give_hdl_stream;
> }
>
> if ((hdl_rec->reg_cbk.saLogWriteLogCallback == NULL) && (ackFlags ==
> SA_LOG_RECORD_WRITE_ACK)) {
> - TRACE("Write Callback not registered");
> - ncshm_give_hdl(logStreamHandle);
> - ncshm_give_hdl(hdl_rec->local_hdl);
> - rc = SA_AIS_ERR_INIT;
> - goto done;
> + TRACE("%s: Write Callback not registered", __FUNCTION__);
> + ais_rc = SA_AIS_ERR_INIT;
> + goto done_give_hdl_all;
> }
>
> + /***
> + * Handle states
> + * Synchronize with mds and recovery thread (mutex)
> + */
> + pthread_mutex_lock(&lga_cb.cb_lock);
> + lgs_state_t lgs_state = lga_cb.lgs_state;
> + lga_state_t lga_state = lga_cb.lga_state;
> + pthread_mutex_unlock(&lga_cb.cb_lock);
>
> - /* Check Whether LGS is up or not */
> - if (!lga_cb.lgs_up) {
> - ncshm_give_hdl(logStreamHandle);
> - ncshm_give_hdl(hdl_rec->local_hdl);
> - TRACE("LGS down");
> - rc = SA_AIS_ERR_TRY_AGAIN;
> - goto done;
> + if (lgs_state == LGS_NO_ACTIVE) {
> + /* We have a server but it is temporarily unavailable.
> + * Client may try again
> + */
> + TRACE("%s: LGS no active", __FUNCTION__);
> + ais_rc = SA_AIS_ERR_TRY_AGAIN;
> + goto done_give_hdl_all;
> + }
> +
> + if (lga_state == LGA_NO_SERVER) {
> + /* We have no server and cannot write. The client may try again
> + */
> + TRACE("\t LGA_NO_SERVER");
> + ais_rc = SA_AIS_ERR_TRY_AGAIN;
> + goto done_give_hdl_all;
> + }
> +
> + if (lga_state == LGA_RECOVERY2) {
> + /* Auto recovery is ongoing. We have to wait for it to finish.
> + * The client may try again
> + */
> + TRACE("\t LGA_RECOVERY2");
> + ais_rc = SA_AIS_ERR_TRY_AGAIN;
> + goto done_give_hdl_all;
> + }
> +
> + if (lga_state == LGA_RECOVERY1) {
> + /* We are in recovery 1 state.
> + * Recover client and execute the request
> + */
> + TRACE("\t LGA_RECOVERY1");
> + rc = recover_one_client(hdl_rec);
> + if (rc == -1) {
> + /* Client could not be recovered. Delete client and
> + * return BAD HANDLE
> + */
> + TRACE("\t recover_one_client Fail");
> + /* The log stream handle is not released in
> + * delete_one_client() so we have to do it here.
> + * The function is used in other APIs that does not
> + * take a log stream handle
> + */
> + ncshm_give_hdl(logStreamHandle);
> + (void) delete_one_client(&lga_cb.client_list, hdl_rec);
> + ais_rc = SA_AIS_ERR_BAD_HANDLE;
> + /* Handles are destroyed so we shall not give handles */
> + goto done;
> + }
> }
>
> /** populate the mds message to send across to the LGS
> @@ -955,18 +1236,20 @@ SaAisErrorT saLogWriteLogAsync(SaLogStre
> /** Send the message out to the LGS
> **/
> if (NCSCC_RC_SUCCESS != lga_mds_msg_async_send(&lga_cb, &msg,
> MDS_SEND_PRIORITY_MEDIUM))
> - rc = SA_AIS_ERR_TRY_AGAIN;
> + ais_rc = SA_AIS_ERR_TRY_AGAIN;
>
> - /** Give all the handles that were taken **/
> - ncshm_give_hdl(lstr_hdl_rec->log_stream_hdl);
> + done_give_hdl_all:
> ncshm_give_hdl(hdl_rec->local_hdl);
> + done_give_hdl_stream:
> + ncshm_give_hdl(logStreamHandle);
>
> - done:
> +done:
> TRACE_LEAVE();
> - return rc;
> + return ais_rc;
> }
>
> /**
> + * API function for closing stream
> *
> * @param logStreamHandle
> *
> @@ -977,34 +1260,85 @@ SaAisErrorT saLogStreamClose(SaLogStream
> lga_log_stream_hdl_rec_t *lstr_hdl_rec;
> lga_client_hdl_rec_t *hdl_rec;
> lgsv_msg_t msg, *o_msg = NULL;
> - SaAisErrorT rc = SA_AIS_OK;
> + SaAisErrorT ais_rc = SA_AIS_OK;
> uint32_t mds_rc;
> + int rc;
>
> TRACE_ENTER();
>
> lstr_hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA, logStreamHandle);
> if (lstr_hdl_rec == NULL) {
> TRACE("ncshm_take_hdl logStreamHandle ");
> - rc = SA_AIS_ERR_BAD_HANDLE;
> + ais_rc = SA_AIS_ERR_BAD_HANDLE;
> goto done;
> }
>
> + /***
> + * Handle states
> + * Synchronize with mds and recovery thread (mutex)
> + */
> + pthread_mutex_lock(&lga_cb.cb_lock);
> + lgs_state_t lgs_state = lga_cb.lgs_state;
> + lga_state_t lga_state = lga_cb.lga_state;
> + pthread_mutex_unlock(&lga_cb.cb_lock);
> +
> + if (lgs_state == LGS_NO_ACTIVE) {
> + /* We have a server but it is temporarily unavailable. Client
> may
> + * try again
> + */
> + TRACE("%s LGS no active", __FUNCTION__);
> + ais_rc = SA_AIS_ERR_TRY_AGAIN;
> + goto done_give_hdl_stream;
> + }
> +
> + if (lga_state == LGA_NO_SERVER) {
> + /* We have no server and cannot write. The client may try again
> + */
> + TRACE("%s No server", __FUNCTION__);
> + ais_rc = SA_AIS_ERR_TRY_AGAIN;
> + goto done_give_hdl_stream;
> + }
> +
> + if (lga_state == LGA_RECOVERY2) {
> + /* Auto recovery is ongoing. We have to wait for it to finish.
> + * The client may try again
> + */
> + TRACE("%s LGA auto recovery ongoing (2)", __FUNCTION__);
> + ais_rc = SA_AIS_ERR_TRY_AGAIN;
> + goto done_give_hdl_stream;
> + }
> +
> /* retrieve the client hdl record */
> hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA,
> lstr_hdl_rec->parent_hdl->local_hdl);
> if (hdl_rec == NULL) {
> TRACE("ncshm_take_hdl logHandle ");
> - rc = SA_AIS_ERR_LIBRARY;
> + ais_rc = SA_AIS_ERR_LIBRARY;
> goto done_give_hdl_stream;
> }
>
> - /* Check Whether LGS is up or not */
> - if (!lga_cb.lgs_up) {
> - TRACE("LGS is down");
> - rc = SA_AIS_ERR_TRY_AGAIN;
> - goto done_give_hdl_all;
> + if (lga_state == LGA_RECOVERY1) {
> + /* We are in recovery 1 state.
> + * Recover client and execute the request
> + */
> + rc = recover_one_client(hdl_rec);
> + if (rc == -1) {
> + /* Client could not be recovered. Delete client and
> + * return BAD HANDLE
> + */
> + TRACE("%s Recover Fail delete_one_client",
> __FUNCTION__);
> + /* The log stream handle is not released in
> + * delete_one_client() so we have to do it here.
> + * The function is used in other APIs that does not
> + * take a log stream handle
> + */
> + ncshm_give_hdl(logStreamHandle);
> + (void) delete_one_client(&lga_cb.client_list, hdl_rec);
> + ais_rc = SA_AIS_ERR_BAD_HANDLE;
> + /* Handles are destroyed so we shall not give handles */
> + goto done;
> + }
> }
>
> -
> /** Populate a MDS message to send to the LGS for a channel
> * close operation.
> **/
> @@ -1018,22 +1352,22 @@ SaAisErrorT saLogStreamClose(SaLogStream
> case NCSCC_RC_SUCCESS:
> break;
> case NCSCC_RC_REQ_TIMOUT:
> - rc = SA_AIS_ERR_TIMEOUT;
> - TRACE("lga_mds_msg_sync_send FAILED: %u", rc);
> + ais_rc = SA_AIS_ERR_TIMEOUT;
> + TRACE("lga_mds_msg_sync_send FAILED: %s", saf_error(ais_rc));
> goto done_give_hdl_all;
> default:
> - TRACE("lga_mds_msg_sync_send FAILED: %u", rc);
> - rc = SA_AIS_ERR_NO_RESOURCES;
> + TRACE("lga_mds_msg_sync_send FAILED: %s", saf_error(ais_rc));
> + ais_rc = SA_AIS_ERR_NO_RESOURCES;
> goto done_give_hdl_all;
> }
>
> if (o_msg != NULL) {
> - rc = o_msg->info.api_resp_info.rc;
> + ais_rc = o_msg->info.api_resp_info.rc;
> lga_msg_destroy(o_msg);
> } else
> - rc = SA_AIS_ERR_NO_RESOURCES;
> + ais_rc = SA_AIS_ERR_NO_RESOURCES;
>
> - if (rc == SA_AIS_OK) {
> + if (ais_rc == SA_AIS_OK) {
> pthread_mutex_lock(&lga_cb.cb_lock);
>
> /** Delete this log stream & the associated resources with this
> @@ -1041,7 +1375,7 @@ SaAisErrorT saLogStreamClose(SaLogStream
> **/
> if (NCSCC_RC_SUCCESS !=
> lga_log_stream_hdl_rec_del(&hdl_rec->stream_list, lstr_hdl_rec)) {
> TRACE("Unable to delete log stream");
> - rc = SA_AIS_ERR_LIBRARY;
> + ais_rc = SA_AIS_ERR_LIBRARY;
> }
>
> pthread_mutex_unlock(&lga_cb.cb_lock);
> @@ -1054,7 +1388,7 @@ SaAisErrorT saLogStreamClose(SaLogStream
>
> done:
> TRACE_LEAVE();
> - return rc;
> + return ais_rc;
> }
>
> /**
> diff --git a/osaf/libs/agents/saf/lga/lga_mds.c
> b/osaf/libs/agents/saf/lga/lga_mds.c
> --- a/osaf/libs/agents/saf/lga/lga_mds.c
> +++ b/osaf/libs/agents/saf/lga/lga_mds.c
> @@ -17,6 +17,7 @@
>
> #include <stdlib.h>
> #include "lga.h"
> +#include "lga_state.h"
>
> static MDS_CLIENT_MSG_FORMAT_VER
> LGA_WRT_LGS_MSG_FMT_ARRAY[LGA_WRT_LGS_SUBPART_VER_RANGE] = {
> @@ -478,38 +479,56 @@ static uint32_t lga_lgs_msg_proc(lga_cb_
>
> ******************************************************************************/
> static uint32_t lga_mds_svc_evt(struct ncsmds_callback_info
> *mds_cb_info)
> {
> - TRACE_2("LGA Rcvd MDS subscribe evt from svc %d \n",
> mds_cb_info->info.svc_evt.i_svc_id);
> + TRACE_ENTER();
>
> switch (mds_cb_info->info.svc_evt.i_change) {
> case NCSMDS_NO_ACTIVE:
> + TRACE("%s\t NCSMDS_NO_ACTIVE", __FUNCTION__);
> + /* This is a temporary server down e.g. during switch/fail
> over*/
> + if (mds_cb_info->info.svc_evt.i_svc_id == NCSMDS_SVC_ID_LGS) {
> + pthread_mutex_lock(&lga_cb.cb_lock);
> + TRACE("NCSMDS_NO_ACTIVE");
> +
> + memset(&lga_cb.lgs_mds_dest, 0, sizeof(MDS_DEST));
> + lga_cb.lgs_state = LGS_NO_ACTIVE;
> + pthread_mutex_unlock(&lga_cb.cb_lock);
> + }
> + break;
> case NCSMDS_DOWN:
> + TRACE("%s\t NCSMDS_DOWN", __FUNCTION__);
> + /* This may be a loss of server where all client and stream
> information
> + * is lost on server side. In this situation client info in
> agent
> is
> + * no longer valid and clients must register again (initialize)
> + */
> if (mds_cb_info->info.svc_evt.i_svc_id == NCSMDS_SVC_ID_LGS) {
> - /** TBD what to do if LGS goes down
> - ** Hold on to the subscription if possible
> - ** to send them out if LGS comes back up
> - **/
> + pthread_mutex_lock(&lga_cb.cb_lock);
> TRACE("LGS down");
> - pthread_mutex_lock(&lga_cb.cb_lock);
> memset(&lga_cb.lgs_mds_dest, 0, sizeof(MDS_DEST));
> - lga_cb.lgs_up = 0;
> + lga_cb.lgs_state = LGS_DOWN;
> pthread_mutex_unlock(&lga_cb.cb_lock);
> +
> + /* The log server is lost */
> + lga_no_server_state_set();
> }
> break;
> case NCSMDS_NEW_ACTIVE:
> case NCSMDS_UP:
> switch (mds_cb_info->info.svc_evt.i_svc_id) {
> case NCSMDS_SVC_ID_LGS:
> + TRACE("%s\t NCSMDS_UP" , __FUNCTION__);
> /** Store the MDS DEST of the LGS
> **/
> - TRACE_2("MSG from LGS NCSMDS_NEW_ACTIVE/UP");
> pthread_mutex_lock(&lga_cb.cb_lock);
> lga_cb.lgs_mds_dest = mds_cb_info->info.svc_evt.i_dest;
> - lga_cb.lgs_up = 1;
> + lga_cb.lgs_state = LGS_UP;
> if (lga_cb.lgs_sync_awaited) {
> /* signal waiting thread */
> m_NCS_SEL_OBJ_IND(&lga_cb.lgs_sync_sel);
> }
> pthread_mutex_unlock(&lga_cb.cb_lock);
> +
> + /* The log server is up */
> + lga_serv_recov1state_set();
> break;
> default:
> break;
> @@ -519,6 +538,7 @@ static uint32_t lga_mds_svc_evt(struct n
> break;
> }
>
> + TRACE_LEAVE();
> return NCSCC_RC_SUCCESS;
> }
>
> @@ -1141,8 +1161,9 @@ uint32_t lga_mds_msg_sync_send(lga_cb_t
> /* Retrieve the response and take ownership of the memory */
> *o_msg = (lgsv_msg_t *)mds_info.info.svc_send.info.sndrsp.o_rsp;
> mds_info.info.svc_send.info.sndrsp.o_rsp = NULL;
> - } else
> + } else {
> TRACE("lga_mds_msg_sync_send FAILED: %u", rc);
> + }
>
> TRACE_LEAVE();
> return rc;
> @@ -1184,8 +1205,9 @@ uint32_t lga_mds_msg_async_send(lga_cb_t
>
> /* send the message */
> rc = ncsmds_api(&mds_info);
> - if (rc != NCSCC_RC_SUCCESS)
> - TRACE("failed");
> + if (rc != NCSCC_RC_SUCCESS) {
> + TRACE("%s: async send failed", __FUNCTION__);
> + }
>
> TRACE_LEAVE();
> return rc;
> diff --git a/osaf/libs/agents/saf/lga/lga_state.c
> b/osaf/libs/agents/saf/lga/lga_state.c
> new file mode 100644
> --- /dev/null
> +++ b/osaf/libs/agents/saf/lga/lga_state.c
> @@ -0,0 +1,670 @@
> +/* -*- OpenSAF -*-
> + *
> + * (C) Copyright 2016 The OpenSAF Foundation
> + *
> + * This program is distributed in the hope that it will be useful,
> but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY
> + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are
> licensed
> + * under the GNU Lesser General Public License Version 2.1, February
> 1999.
> + * The complete license can be accessed from the following location:
> + * http://opensource.org/licenses/lgpl-license.php
> + * See the Copying file included with the OpenSAF distribution for
> full
> + * licensing terms.
> + *
> + * Author(s): Ericsson AB
> + *
> + */
> +
> +#include "lga_state.h"
> +#include <stdlib.h>
> +#include <syslog.h>
> +#include <pthread.h>
> +#include <osaf_poll.h>
> +#include <ncs_osprm.h>
> +#include <osaf_time.h>
> +#include <saf_error.h>
> +
> +/***
> + * Common data
> + */
> +
> +/* Selection object for terminating recovery thread for state 2
> (after timeout)
> + * NOTE: Only for recovery2_thread
> + */
> +static NCS_SEL_OBJ state2_terminate_sel_obj;
> +
> +static pthread_t recovery2_thread_id = 0;
> +static pthread_mutex_t lga_recov_mutex = PTHREAD_MUTEX_INITIALIZER;
> +
> +/******************************************************************************
> + * Functions used with server down recovery handling
> +
> ******************************************************************************/
> +
> +static int start_recovery2_thread(void);
> +
> +/******************************************************************************
> + * Functions for sending messages to the server used in the recovery
> functions
> + * Handles TRY AGAIN
> +
> ******************************************************************************/
> +
> +/**
> + * Send an initialize message to the server. A number of retries is
> done if
> + * return code is TRY AGAIN
> + * The server returns a client id
> + *
> + * @param client_id[out]
> + * @return -1 on error (client id not valid)
> + */
> +static int send_initialize_msg(uint32_t *client_id)
> +{
> + SaVersionT version = {
> + .releaseCode = LOG_RELEASE_CODE,
> + .majorVersion = LOG_MAJOR_VERSION,
> + .minorVersion = LOG_MINOR_VERSION
> + };
> +
> + SaAisErrorT ais_rc = SA_AIS_ERR_TRY_AGAIN;
> + uint32_t ncs_rc = NCSCC_RC_SUCCESS;
> + int rc = 0;
> + lgsv_msg_t i_msg, *o_msg = NULL;
> + uint32_t try_again_cnt = 10;
> + const uint32_t sleep_delay_ms = 100;
> +
> + TRACE_ENTER();
> +
> + /* Populate the message to be sent to the LGS */
> + memset(&i_msg, 0, sizeof(lgsv_msg_t));
> + i_msg.type = LGSV_LGA_API_MSG;
> + i_msg.info.api_info.type = LGSV_INITIALIZE_REQ;
> + i_msg.info.api_info.param.init.version = version;
> +
> + /* Send a message to LGS to obtain a client_id
> + */
> + while(try_again_cnt > 0) {
> + ncs_rc = lga_mds_msg_sync_send(&lga_cb, &i_msg, &o_msg,
> + LGS_WAIT_TIME,MDS_SEND_PRIORITY_HIGH);
> + if (ncs_rc != NCSCC_RC_SUCCESS) {
> + LOG_NO("%s lga_mds_msg_sync_send() Fail %d",
> + __FUNCTION__, ncs_rc);
> + rc = -1;
> + goto done;
> + }
> +
> + ais_rc = o_msg->info.api_resp_info.rc;
> + if (ais_rc == SA_AIS_ERR_TRY_AGAIN) {
> + usleep(sleep_delay_ms * 1000);
> + } else {
> + break;
> + }
> + try_again_cnt--;
> + }
> + if (SA_AIS_OK != ais_rc) {
> + TRACE("%s LGS error response %s", __FUNCTION__,
> saf_error(ais_rc));
> + rc = -1;
> + goto free_done;
> + }
> +
> + /* Initialize succeeded */
> + *client_id = o_msg->info.api_resp_info.param.init_rsp.client_id;
> +
> +free_done:
> + /* Free up the response message */
> + lga_msg_destroy(o_msg);
> +
> +done:
> + TRACE_LEAVE2("rc = %d", rc);
> + return rc;
> +}
> +
> +/**
> + * Send an open message to the server. A number of retries is done
> if
> + * return code is TRY AGAIN
> + * The server returns client id and a stream id
> + *
> + * @param lstream_id[out] Log stream id received from server
> + * @param p_stream[in] Pointer to a stream record
> + * @return -1 on error (stream id not valid)
> + */
> +static int send_stream_open_msg(uint32_t *lstream_id,
> + lga_log_stream_hdl_rec_t *p_stream)
> +{
> + SaAisErrorT ais_rc = SA_AIS_OK;
> + uint32_t ncs_rc = NCSCC_RC_SUCCESS;
> + int rc = 0;
> + lgsv_msg_t i_msg, *o_msg;
> + lgsv_stream_open_req_t *open_param;
> + lga_client_hdl_rec_t *p_client = p_stream->parent_hdl;
> + uint32_t try_again_cnt = 10;
> + const uint32_t sleep_delay_ms = 100;
> +
> + TRACE_ENTER();
> + TRACE("\t log_stream_name \"%s\", lgs_client_id=%d",
> + p_stream->log_stream_name.value, p_client->lgs_client_id);
> +
> + /* Populate a stream open message to the LGS
> + */
> + memset(&i_msg, 0, sizeof(lgsv_msg_t));
> + open_param = &i_msg.info.api_info.param.lstr_open_sync;
> +
> + /* Set the open parameters to open a stream for recovery */
> + open_param->client_id = p_client->lgs_client_id;
> + open_param->lstr_name = p_stream->log_stream_name;
> + open_param->logFileFmt = NULL;
> + open_param->logFileFmtLength = 0;
> + open_param->maxLogFileSize = 0;
> + open_param->maxLogRecordSize = 0;
> + open_param->haProperty = SA_FALSE;
> + open_param->logFileFullAction = 0;
> + open_param->maxFilesRotated = 0;
> + open_param->lstr_open_flags = 0;
> +
> + i_msg.type = LGSV_LGA_API_MSG;
> + i_msg.info.api_info.type = LGSV_STREAM_OPEN_REQ;
> +
> + /* Send a message to LGS to obtain a stream_id
> + */
> + while(try_again_cnt > 0) {
> + ncs_rc = lga_mds_msg_sync_send(&lga_cb, &i_msg, &o_msg,
> + LGS_WAIT_TIME, MDS_SEND_PRIORITY_HIGH);
> + if (ncs_rc != NCSCC_RC_SUCCESS) {
> + rc = -1;
> + TRACE("%s lga_mds_msg_sync_send() Fail %d",
> __FUNCTION__,
> ncs_rc);
> + goto done;
> + }
> +
> + ais_rc = o_msg->info.api_resp_info.rc;
> + if (ais_rc == SA_AIS_ERR_TRY_AGAIN) {
> + usleep(sleep_delay_ms * 1000);
> + } else {
> + break;
> + }
> + try_again_cnt--;
> + }
> + if (SA_AIS_OK != ais_rc) {
> + TRACE("%s LGS error response %s", __FUNCTION__,
> saf_error(ais_rc));
> + rc = -1;
> + goto free_done;
> + }
> +
> + /* Open succeeded */
> + *lstream_id =
> o_msg->info.api_resp_info.param.lstr_open_rsp.lstr_id;
> +
> +free_done:
> + /* Free up the response message */
> + lga_msg_destroy(o_msg);
> +
> +done:
> + TRACE_LEAVE2("rc = %d", rc);
> + return rc;
> +}
> +
> +/******************************************************************************
> + * Recovery functions
> +
> ******************************************************************************/
> +
> +/**
> + * Register an existing client with the server
> + * - Send an initialize request to the server
> + * - A new client id is received replacing the old no longer valid
> one
> + *
> + * This function shall only be used in recovery. It is assumed that
> + * lga_serv_downstate_set() has been called
> + *
> + * @param p_client[in] Pointer to a client record
> + * @return -1 on error
> + * 0 client is successfully initialized
> + * 1 client was already initialized
> + */
> +static int initialize_one_client(lga_client_hdl_rec_t *p_client)
> +{
> + int rc = 0;
> + uint32_t client_id;
> +
> + TRACE_ENTER();
> +
> + if (p_client->initialized_flag == true) {
> + /* The client is already initialized */
> + rc = 1;
> + goto done;
> + }
> +
> + rc = send_initialize_msg(&client_id);
> + if (rc == -1) {
> + TRACE("%s initialize_msg_send Fail", __FUNCTION__);
> + }
> +
> + /* Restore the client Id with the one returned by the LGS and
> + * set the initialized flag
> + */
> + osaf_mutex_lock_ordie(&lga_cb.cb_lock);
> + p_client->lgs_client_id = client_id;
> + p_client->initialized_flag = true;
> + osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> +
> +done:
> + TRACE_LEAVE2("rc = %d", rc);
> + return rc;
> +}
> +
> +/**
> + * Register a stream that was opened by the client with the server
> + * - Send a stream open request to the server (no parameter list)
> + * - A new stream id is received replacing the old no longer valid
> one
> + *
> + * @param p_stream[in] Pointer to a stream record
> + * @return -1 on error
> + * 0 The stream was successfully recovered
> + * 1 The stream was already recovered
> + */
> +static int recover_one_stream(lga_log_stream_hdl_rec_t *p_stream)
> +{
> + int rc = 0;
> + uint32_t stream_id;
> +
> + TRACE_ENTER();
> +
> + if (p_stream->recovered_flag == true) {
> + /* The stream is already recovered */
> + rc = 1;
> + goto done;
> + }
> +
> + rc = send_stream_open_msg(&stream_id, p_stream);
> + if (rc == -1) {
> + TRACE("%s open_stream_msg_send Fail", __FUNCTION__);
> + goto done;
> + }
> +
> + /* Restore the stream Id with the Id returned by the LGS and
> + * set the recovered flag
> + */
> + osaf_mutex_lock_ordie(&lga_cb.cb_lock);
> + p_stream->lgs_log_stream_id = stream_id;
> + p_stream->recovered_flag = true;
> + osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> +
> +done:
> + TRACE_LEAVE2("rc = %d", rc);
> + return rc;
> +}
> +
> +/******************************************************************************
> + * Recovery 2 thread handling functions
> +
> ******************************************************************************/
> +
> +/**
> + * Thread for handling recovery step 2
> + * Wait for timeout or stop request
> + * After timeout, recover all not already recovered clients
> + *
> + * @param int timeout_time_in Timeout time in s
> + * @return NULL
> + */
> +static void *recovery2_thread(void *dummy)
> +{
> + int rc = 0;
> + struct timespec seed_ts;
> + int timeout_ms;
> +
> + TRACE_ENTER();
> +
> + /* Create a random timeout time in msec within an interval
> + */
> + /* Set seed. Use nanoseconds from clock */
> + osaf_clock_gettime(CLOCK_MONOTONIC, &seed_ts);
> + srandom((unsigned int) seed_ts.tv_nsec);
> + /* Interval 400 - 500 sec */
> + timeout_ms = (int) (random() % 100 + 400) * 1000;
> +
> + /* Wait for timeout or a signal to terminate
> + */
> + rc = osaf_poll_one_fd(state2_terminate_sel_obj.rmv_obj,
> timeout_ms);
> + if (rc == -1) {
> + TRACE("%s osaf_poll_one_fd Fail %s", __FUNCTION__,
> strerror(errno));
> + goto done;
> + }
> +
> + if (rc == 0) {
> + /* Timeout; Set recovery state 2 */
> + osaf_mutex_lock_ordie(&lga_cb.cb_lock);
> + lga_cb.lga_state = LGA_RECOVERY2;
> + osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> + TRACE("%s Poll timeout. Enter LGA_RECOVERY2 state",
> __FUNCTION__);
> + } else {
> + /* Stop signal received */
> + TRACE("%s Stop signal received", __FUNCTION__);
> + goto done;
> + }
> +
> + /* Execute recovery 2
> + * Recover one client at the time
> + * If fail to recover remove the client. This means that the client
> + * handle is invalidated
> + * When all clients are recovered or if exit is requested exit the
> + * thread
> + */
> + TRACE("%s Execute recovery 2", __FUNCTION__);
> + /* First client */
> + lga_client_hdl_rec_t *p_client;
> + p_client = lga_cb.client_list;
> +
> + while (p_client != NULL) {
> + /* Exit if requested to */
> + rc = osaf_poll_one_fd(state2_terminate_sel_obj.rmv_obj, 0);
> + if (rc > 0) {
> + /* We have been signaled to exit */
> + goto done;
> + }
> + /* Recover clients one at a time */
> + rc = recover_one_client(p_client);
> + TRACE("\t Client %d is recovered", p_client->lgs_client_id);
> + if (rc == -1) {
> + TRACE("%s recover_one_client Fail Deleting cllient (id
> %d)",
> + __FUNCTION__, p_client->lgs_client_id);
> + /* Fail to recover this client
> + * Remove (handle invalidated)
> + */
> + osaf_mutex_lock_ordie(&lga_cb.cb_lock);
> + (void) delete_one_client(&lga_cb.client_list, p_client);
> + osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> + }
> +
> + /* Next client */
> + p_client = p_client->next;
> + }
> +
> + /* All clients are recovered (or removed).
> + * Or recovery is aborted
> + * Change to not recovering state
> + * LGA_NORMAL
> + */
> + osaf_mutex_lock_ordie(&lga_cb.cb_lock);
> + lga_cb.lga_state = LGA_NORMAL;
> + TRACE("\t Setting lga_state = LGA_NORMAL");
> + osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> +
> +done:
> + /* Cleanup and Exit thread */
> + (void) ncs_sel_obj_destroy(&state2_terminate_sel_obj);
> +
> + TRACE_LEAVE();
> + return NULL;
> +}
> +
> +/**
> + * Setup and start the thread for recovery state 2
> + *
> + * @return -1 on error
> + */
> +static int start_recovery2_thread(void)
> +{
> + int rc = 0;
> + uint32_t ncs_rc = NCSCC_RC_SUCCESS;
> + pthread_attr_t attr;
> +
> + TRACE_ENTER();
> +
> + pthread_attr_init(&attr);
> + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
> +
> + /* Create a selection object for signaling the recovery2 thread */
> + ncs_rc = ncs_sel_obj_create(&state2_terminate_sel_obj);
> + if (ncs_rc != NCSCC_RC_SUCCESS) {
> + TRACE("%s ncs_sel_obj_create Fail", __FUNCTION__);
> + rc = -1;
> + goto done;
> + }
> +
> + /* Create the thread
> + */
> + if (pthread_create(&recovery2_thread_id, &attr, recovery2_thread,
> NULL) != 0) {
> + /* pthread_create() error handling */
> + TRACE("\t pthread_create FAILED: %s", strerror(errno));
> + rc = -1;
> + (void) ncs_sel_obj_destroy(&state2_terminate_sel_obj);
> + goto done;
> + }
> + pthread_attr_destroy(&attr);
> +
> +done:
> + TRACE_LEAVE2("rc = %d", rc);
> + return rc;
> +}
> +
> +/**
> + * Stops the recovery 2 thread
> + * It is safe to call this function also if the thread is not
> running
> + */
> +static void stop_recovery2_thread(void)
> +{
> + uint32_t ncs_rc = 0;
> +
> + TRACE_ENTER();
> +
> + /* Check if the thread is running */
> + osaf_mutex_lock_ordie(&lga_cb.cb_lock);
> + lga_state_t lga_state = lga_cb.lga_state;
> + osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> +
> + if (lga_state == LGA_NORMAL) {
> + /* No thread to stop */
> + TRACE("%s LGA_NORMAL no thread to stop", __FUNCTION__);
> + goto done;
> + }
> +
> + /* Signal the thread to stop */
> + ncs_rc = ncs_sel_obj_ind(&state2_terminate_sel_obj);
> + if (ncs_rc != NCSCC_RC_SUCCESS) {
> + TRACE("%s ncs_sel_obj_ind Fail", __FUNCTION__);
> + }
> +
> + done:
> + TRACE_LEAVE();
> + return;
> +}
> +
> +/******************************************************************************
> + * Recovery state handling functions
> +
> ******************************************************************************/
> +
> +/******************************************************************************
> + * Server Down
> + *
> + * Initiate recovery handling and set LGA_NO_SERVER state
> + * LGA_NO_SERVER: State set in MDS event handler when server down
> event
> + * lga_no_server_state_set()
> +
> ******************************************************************************/
> +
> +/**
> + * Recovery state LGA_NO_SERVER
> + *
> + * Setup server down state
> + * - Mark all clients and their streams as not recovered
> + * - Remove recovery thread if exist
> + * - Set LGA_NO_SERVER state
> + *
> + */
> +void lga_no_server_state_set(void)
> +{
> + lga_client_hdl_rec_t *p_client = lga_cb.client_list;
> + lga_log_stream_hdl_rec_t *p_stream;
> +
> + TRACE_ENTER();
> +
> + /* Stop recovery thread for state 2 if exist */
> + stop_recovery2_thread();
> +
> + /* Set LGA_NO_SERVER state */
> + osaf_mutex_lock_ordie(&lga_cb.cb_lock);
> + lga_cb.lga_state = LGA_NO_SERVER;
> + TRACE("\t lga_state = LGA_NO_SERVER");
> + osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> +
> + while (p_client != NULL) {
> + /* Set Client flags for all clients */
> + p_client->initialized_flag = false;
> + p_client->recovered_flag = false;
> +
> + /* Set stream flags for all streams in every client */
> + p_stream = p_client->stream_list;
> + while (p_stream != NULL) {
> + p_stream->recovered_flag = false;
> +
> + p_stream = p_stream->next;
> + }
> +
> + p_client = p_client->next;
> + }
> +
> + TRACE_LEAVE();
> +}
> +
> +/******************************************************************************
> + * Recovery state 1
> + *
> + * Recover when needed. This means that when a client requests a
> service
> + * the client and all its open streams is recovered in server.
> + * Handler functions for this can be found in this section.
> + * LGA_RECOVERY1: State set in MDS event handler when server up
> event
> + * lga_serv_recov1state_set(). Starting recovery
> thread
> + *
> + * LGA_RECOVERY2: State set in recovery thread after timeout
> + * lga_serv_recov2state_set()
> + *
> + * LGA_NORMAL: State set when all clients are recovered. This is
> done in
> + * recovery thread when done. After setting this state
> the
> + * thread will exit
> +
> ******************************************************************************/
> +
> +/**
> + * If previous state was LGA_NO_SERVER (headless)
> + * Start the recovery2_thread. This will start timeout timer for
> recovery 2
> + * state.
> + * Set state to LGA_RECOVERY1
> + */
> +void lga_serv_recov1state_set(void)
> +{
> + TRACE_ENTER();
> +
> + osaf_mutex_lock_ordie(&lga_cb.cb_lock);
> + if (lga_cb.lga_state != LGA_NO_SERVER) {
> + /* We have not been headless. No recovery shall be done */
> + TRACE("%s Previous state was not LGA_NO_SERVER lga_stat = %d",
> + __FUNCTION__, lga_cb.lga_state);
> + osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> + goto done;
> + } else {
> + lga_cb.lga_state = LGA_RECOVERY1;
> + TRACE("lga_state = %d (2->RECOVERY1)",
> + lga_cb.lga_state);
> + osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> + }
> +
> + start_recovery2_thread();
> +
> +done:
> + TRACE_LEAVE();
> + return;
> +}
> +
> +/**
> + * Recover the client and all streams in its stream list
> + * - Send an initialize request to the server
> + * - Send a stream open request with no parameters to the server
> + * for each stream.
> + * - If success mark the stream and client as recovered
> + * - If fail to initiate the client or recover one of its streams
> the
> + * client shall be finalized. A finalize request is sent to the
> server.
> + * If error return code from server it is ignored. This may happen
> e.g. if
> + * the client was never initialized.
> + *
> + * The reason for finalizing the whole client if only one stream
> cannot be
> + * recovered is that the client will get BAD_HANDLE when requesting a
> service
> + * for that stream. This may cause the client to initialize a new
> client and
> + * the old client will remain as a resource leek.
> + *
> + * @param p_client[in] Pointer to a client record
> + * @return -1 on error
> + */
> +int recover_one_client(lga_client_hdl_rec_t *p_client)
> +{
> + int rc = 0;
> + lga_log_stream_hdl_rec_t *p_stream;
> +
> + TRACE_ENTER();
> +
> + /* This function may be called both in the recovery thread and in
> the
> + * client thread. A possible scenario is that the recovery 2 thread
> + * times out and calls this function in recovery mode 2 while there
> + * a client recovery is still ongoing started in mode 1. The
> recovery 2
> + * thread must wait until ongoing recovery is done
> + */
> + osaf_mutex_lock_ordie(&lga_recov_mutex);
> + /* We may have been waiting at mutex while the client was recovered
> + * so it may already been recovered.
> + */
> + if (p_client->recovered_flag == true) {
> + /* Client is already recovered */
> + TRACE("\t Already recovered");
> + goto done;
> + }
> +
> + rc = initialize_one_client(p_client);
> + if (rc == -1) {
> + TRACE("%s initialize_one_client() Fail client Id %d",
> + __FUNCTION__, p_client->lgs_client_id);
> + goto done;
> + }
> +
> + /* Recover all streams registered with this client */
> + p_stream = p_client->stream_list;
> + while (p_stream != NULL) {
> + TRACE("\t Recover client=%d, stream=%d",
> + p_client->lgs_client_id, p_stream->lgs_log_stream_id);
> + rc = recover_one_stream(p_stream);
> + if (rc == -1) {
> + TRACE("%s recover_one_stream() Fail "
> + "client Id %d stream Id %d", __FUNCTION__,
> + p_client->lgs_client_id,
> + p_stream->lgs_log_stream_id);
> + goto done;
> + }
> +
> + p_stream = p_stream->next;
> + }
> +
> + osaf_mutex_lock_ordie(&lga_cb.cb_lock);
> + p_client->recovered_flag = true;
> + osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> +
> +done:
> + osaf_mutex_unlock_ordie(&lga_recov_mutex);
> +
> + TRACE_LEAVE();
> + return rc;
> +}
> +
> +/**
> + * Delete one client
> + * Wrapper for function lga_hdl_rec_del() is used (lga_utol.c)
> + * This wrapper adds a control to make sure that the function cannot
> + * be used by the recovery 2 thread and the client thread at the same
> time
> + *
> + * @param list_head
> + * @param rm_node
> + */
> +uint32_t delete_one_client(
> + lga_client_hdl_rec_t **list_head,
> + lga_client_hdl_rec_t *rm_node
> + )
> +{
> + TRACE_ENTER2();
> + uint32_t ncs_rc;
> +
> + osaf_mutex_lock_ordie(&lga_recov_mutex);
> + ncs_rc = lga_hdl_rec_del(list_head, rm_node);
> + osaf_mutex_unlock_ordie(&lga_recov_mutex);
> +
> + TRACE_LEAVE();
> + return ncs_rc;
> +}
> diff --git a/osaf/libs/agents/saf/lga/lga_state.h
> b/osaf/libs/agents/saf/lga/lga_state.h
> new file mode 100644
> --- /dev/null
> +++ b/osaf/libs/agents/saf/lga/lga_state.h
> @@ -0,0 +1,41 @@
> +/* -*- OpenSAF -*-
> + *
> + * (C) Copyright 2016 The OpenSAF Foundation
> + *
> + * This program is distributed in the hope that it will be useful,
> but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY
> + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are
> licensed
> + * under the GNU Lesser General Public License Version 2.1, February
> 1999.
> + * The complete license can be accessed from the following location:
> + * http://opensource.org/licenses/lgpl-license.php
> + * See the Copying file included with the OpenSAF distribution for
> full
> + * licensing terms.
> + *
> + * Author(s): Ericsson AB
> + *
> + */
> +
> +#ifndef LGA_STATE_H
> +#define LGA_STATE_H
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include "lga.h"
> +
> +/* Recovery functions */
> +void lga_no_server_state_set(void);
> +void lga_serv_recov1state_set(void);
> +int recover_one_client(lga_client_hdl_rec_t *p_client);
> +uint32_t delete_one_client(
> + lga_client_hdl_rec_t **list_head,
> + lga_client_hdl_rec_t *rm_node
> + );
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* LGA_STATE_H */
> +
> diff --git a/osaf/libs/agents/saf/lga/lga_util.c
> b/osaf/libs/agents/saf/lga/lga_util.c
> --- a/osaf/libs/agents/saf/lga/lga_util.c
> +++ b/osaf/libs/agents/saf/lga/lga_util.c
> @@ -44,7 +44,11 @@ static unsigned int lga_create(void)
> }
>
> /* Block and wait for indication from MDS meaning LGS is up */
> - osaf_poll_one_fd(m_GET_FD_FROM_SEL_OBJ(lga_cb.lgs_sync_sel),
> 30000);
> +
> + /* #1179 Change timeout from 30 sec (30000) to 10 sec (10000)
> + * 30 sec is probably too long for a synchronous API function
> + */
> + osaf_poll_one_fd(m_GET_FD_FROM_SEL_OBJ(lga_cb.lgs_sync_sel),
> 10000);
>
> pthread_mutex_lock(&lga_cb.cb_lock);
> lga_cb.lgs_sync_awaited = 0;
> @@ -118,12 +122,16 @@ static bool lga_clear_mbx(NCSCONTEXT arg
> static void lga_log_stream_hdl_rec_list_del(lga_log_stream_hdl_rec_t
> **plstr_hdl)
> {
> lga_log_stream_hdl_rec_t *lstr_hdl;
> + TRACE_ENTER();
> while ((lstr_hdl = *plstr_hdl) != NULL) {
> *plstr_hdl = lstr_hdl->next;
> + TRACE("%s stream \"%s\", hdl = %d",__FUNCTION__,
> + lstr_hdl->log_stream_name.value,
> lstr_hdl->log_stream_hdl);
> ncshm_destroy_hdl(NCS_SERVICE_ID_LGA, lstr_hdl->log_stream_hdl);
> free(lstr_hdl);
> lstr_hdl = NULL;
> }
> + TRACE_LEAVE();
> }
>
>
> /****************************************************************************
> @@ -264,11 +272,13 @@ static uint32_t lga_hdl_cbk_dispatch_blo
> }
>
> /**
> - *
> + * Initiate the agent when first used.
> + * Start NCS service
> + * Register with MDS
> *
> * @return unsigned int
> */
> -unsigned int lga_startup(void)
> +unsigned int lga_startup(lga_cb_t *cb)
> {
> unsigned int rc = NCSCC_RC_SUCCESS;
> pthread_mutex_lock(&lga_lock);
> @@ -287,8 +297,14 @@ unsigned int lga_startup(void)
> if ((rc = lga_create()) != NCSCC_RC_SUCCESS) {
> ncs_agents_shutdown();
> goto done;
> - } else
> + } else {
> lga_use_count = 1;
> + }
> +
> + /* Agent has successfully been started including communication
> + * with server
> + */
> + cb->lga_state = LGA_NORMAL;
> }
>
> done:
> @@ -299,11 +315,18 @@ unsigned int lga_startup(void)
> }
>
> /**
> + * If called when only one (the last) client for this agent the
> client list a
> + * complete 'shut down' of the agent is done.
> + * - Erase the clients list. Frees all memory including list of
> open
> + * streams.
> + * - Unregister with MDS
> + * - Shut down ncs agents
> *
> + * Global lga_use_count Conatins number of registered clients
> *
> - * @return unsigned int
> + * @return unsigned int (always NCSCC_RC_SUCCESS)
> */
> -unsigned int lga_shutdown(void)
> +unsigned int lga_shutdown_after_last_client(void)
> {
> unsigned int rc = NCSCC_RC_SUCCESS;
>
> @@ -325,6 +348,37 @@ unsigned int lga_shutdown(void)
> return rc;
> }
>
> +/**
> + * Makes a forced shut down of the agent if there are registered
> clients
> + * Makes all handles invalid and frees all resources (memory, mds)
> + *
> + * clients and the log server is down and no other recovery is
> possible.
> + * - Erase the clients list. Frees all memory including list of
> open
> + * streams.
> + * - Unregister with MDS
> + * - Shut down ncs agents
> + * - Set
> + *
> + * Global lga_use_count [in/out] = 0
> + *
> + * @return always NCSCC_RC_SUCCESS
> + */
> +unsigned int lga_force_shutdown(void)
> +{
> + unsigned int rc = NCSCC_RC_SUCCESS;
> + TRACE_ENTER();
> + pthread_mutex_lock(&lga_lock);
> + if (lga_use_count > 0) {
> + lga_destroy();
> + rc = ncs_agents_shutdown(); /* Always returns NCSCC_RC_SUCCESS
> */
> + lga_use_count = 0;
> + TRACE("%s: Forced shutdown. Handles
> invalidated\n",__FUNCTION__);
> + }
> + pthread_mutex_unlock(&lga_lock);
> + TRACE_LEAVE();
> + return rc;
> +}
> +
>
> /****************************************************************************
> * Name : lga_msg_destroy
> *
> @@ -387,6 +441,8 @@ void lga_hdl_list_del(lga_client_hdl_rec
> {
> lga_client_hdl_rec_t *client_hdl;
>
> + TRACE_ENTER();
> +
> while ((client_hdl = *p_client_hdl) != NULL) {
> *p_client_hdl = client_hdl->next;
> ncshm_destroy_hdl(NCS_SERVICE_ID_LGA, client_hdl->local_hdl);
> @@ -398,6 +454,7 @@ void lga_hdl_list_del(lga_client_hdl_rec
> free(client_hdl);
> client_hdl = 0;
> }
> + TRACE_LEAVE();
> }
>
>
> /****************************************************************************
> @@ -416,6 +473,7 @@ void lga_hdl_list_del(lga_client_hdl_rec
>
> ******************************************************************************/
> uint32_t lga_log_stream_hdl_rec_del(lga_log_stream_hdl_rec_t
> **list_head, lga_log_stream_hdl_rec_t *rm_node)
> {
> + TRACE_ENTER();
> /* Find the channel hdl record in the list of records */
> lga_log_stream_hdl_rec_t *list_iter = *list_head;
>
> @@ -427,6 +485,7 @@ uint32_t lga_log_stream_hdl_rec_del(lga_
> ncshm_give_hdl(rm_node->log_stream_hdl);
> ncshm_destroy_hdl(NCS_SERVICE_ID_LGA, rm_node->log_stream_hdl);
> free(rm_node);
> + TRACE_LEAVE();
> return NCSCC_RC_SUCCESS;
> } else { /* find the rec */
>
> @@ -438,6 +497,7 @@ uint32_t lga_log_stream_hdl_rec_del(lga_
> ncshm_give_hdl(rm_node->log_stream_hdl);
> ncshm_destroy_hdl(NCS_SERVICE_ID_LGA,
> rm_node->log_stream_hdl);
> free(rm_node);
> + TRACE_LEAVE();
> return NCSCC_RC_SUCCESS;
> }
> /* move onto the next one */
> @@ -493,7 +553,6 @@ uint32_t lga_hdl_rec_del(lga_client_hdl_
> rc = NCSCC_RC_SUCCESS;
> goto out;
> } else { /* find the rec */
> -
> while (NULL != list_iter) {
> if (list_iter->next == rm_node) {
> list_iter->next = rm_node->next;
> @@ -517,10 +576,9 @@ uint32_t lga_hdl_rec_del(lga_client_hdl_
> list_iter = list_iter->next;
> }
> }
> - TRACE("failed");
>
> out:
> - TRACE_LEAVE();
> + TRACE_LEAVE2("rc = %d (2 <=> fail)", rc);
> return rc;
> }
>
> @@ -566,6 +624,15 @@ lga_log_stream_hdl_rec_t *lga_log_stream
> rec->log_stream_name.length = logStreamName->length;
> memcpy((void *)rec->log_stream_name.value, (void
> *)logStreamName->value, logStreamName->length);
> rec->log_header_type = log_header_type;
> +
> + /***
> + * Initiate the recovery flag
> + * The setting means that the stream is initialized and that there
> is
> + * no reason to recover. This setting will change if server down is
> + * detected
> + */
> + rec->recovered_flag = true;
> +
> /** Initialize the parent handle **/
> rec->parent_hdl = *hdl_rec;
>
> @@ -616,6 +683,15 @@ lga_client_hdl_rec_t *lga_hdl_rec_add(lg
> **/
> rec->lgs_client_id = client_id;
>
> + /***
> + * Initiate the recovery flags
> + * The setting means that the client is initialized and that there
> is
> + * no reason to recover. This setting will change if server down is
> + * detected
> + */
> + rec->initialized_flag = true;
> + rec->recovered_flag = true;
> +
> /** Initialize and attach the IPC/Priority queue
> **/
> if (m_NCS_IPC_CREATE(&rec->mbx) != NCSCC_RC_SUCCESS) {
------------------------------------------------------------------------------
Site24x7 APM Insight: Get Deep Visibility into Application Performance
APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month
Monitor end-to-end web transactions and take corrective actions now
Troubleshoot faster and improve end-user experience. Signup Now!
http://pubads.g.doubleclick.net/gampad/clk?id=272487151&iu=/4140
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel