Hi Mathi,

As Lennart's feedback, in case of getting streamClose, the log agent can remove
the stream from the database, no need to wait till the stream is recovered.

I will update this point. Thanks.

Regards, Vu.


>-----Original Message-----
>From: Lennart Lund [mailto:[email protected]]
>Sent: Tuesday, February 23, 2016 8:30 PM
>To: Mathivanan Naickan Palanivelu; Vu Minh Nguyen
>Cc: [email protected]; Anders Widell; Lennart Lund
>Subject: RE: [PATCH 2 of 4] log: add support for cloud resilience feature 
>(agent
>part) [#1179]
>
>Hi Vu
>
>Mathi has a point here. It is actually possible to serve stream close also when
>headless.
>If in state LGA_NO_SERVER the stream can be removed from the agent client
>database without sending a message to the (not existing) server.
>In IMM there will exist a stream object and if this was the only client the
>stream object should have been removed by the server. This means that we
>now have a "dead" stream object. However there is a mechanism in the server
>that will clean up such objects after a timeout.
>
>Regards
>Lennart
>
>> -----Original Message-----
>> From: Mathivanan Naickan Palanivelu [mailto:[email protected]]
>> Sent: den 23 februari 2016 13:44
>> To: Vu Minh Nguyen
>> Cc: Lennart Lund; [email protected]; Anders Widell
>> Subject: Re: [PATCH 2 of 4] log: add support for cloud resilience feature
>> (agent part) [#1179]
>>
>> Hi Vu,
>>
>> Ack for patch 2.
>>
>> B.T.w, What is the idea behind returning try_again for streamclose(), i.e. as
>> below?
>>
>> +       if (lga_state == LGA_NO_SERVER) {
>> +               /* We have no server and cannot write. The client may try 
>> again
>> +                */
>> +               TRACE("%s No server", __FUNCTION__);
>> +               ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> +               goto done_give_hdl_stream;
>> +       }
>> +
>>
>>
>>
>> Mathi.
>>
>> ----- [email protected] wrote:
>>
>> > osaf/libs/agents/saf/lga/Makefile.am |    6 +-
>> >  osaf/libs/agents/saf/lga/lga.h       |   53 +-
>> >  osaf/libs/agents/saf/lga/lga_api.c   |  858
>> > ++++++++++++++++++++++++----------
>> >  osaf/libs/agents/saf/lga/lga_mds.c   |   46 +-
>> >  osaf/libs/agents/saf/lga/lga_state.c |  670
>> > +++++++++++++++++++++++++++
>> >  osaf/libs/agents/saf/lga/lga_state.h |   41 +
>> >  osaf/libs/agents/saf/lga/lga_util.c  |   94 +++-
>> >  7 files changed, 1479 insertions(+), 289 deletions(-)
>> >
>> >
>> > The patch makes LOG service be able to handle the case that both SC
>> > nodes
>> > are down at the same time.
>> > When one or both nodes go up again the log service must be able to
>> > resume its work preferably without actions by the clients.
>> >
>> > A log client should not have to be aware of if one or both SC nodes
>> > are down.
>> > The only thing that should happen is that a TRY AGAIN (and in some
>> > cases TIMEOUT) returned.
>> > It is the responsibility of the client to decide how to handle this.
>> >
>> > diff --git a/osaf/libs/agents/saf/lga/Makefile.am
>> > b/osaf/libs/agents/saf/lga/Makefile.am
>> > --- a/osaf/libs/agents/saf/lga/Makefile.am
>> > +++ b/osaf/libs/agents/saf/lga/Makefile.am
>> > @@ -20,7 +20,8 @@ include $(top_srcdir)/Makefile.common
>> >  MAINTAINERCLEANFILES = Makefile.in
>> >
>> >  noinst_HEADERS = \
>> > -  lga.h
>> > +  lga.h \
>> > +  lga_state.h
>> >
>> >  noinst_LTLIBRARIES = liblga.la
>> >
>> > @@ -31,6 +32,7 @@ liblga_la_CPPFLAGS = \
>> >  liblga_la_SOURCES = \
>> >    lga_api.c \
>> >    lga_util.c \
>> > -  lga_mds.c
>> > +  lga_mds.c \
>> > +  lga_state.c
>> >
>> >  liblga_la_LDFLAGS = -static
>> > diff --git a/osaf/libs/agents/saf/lga/lga.h
>> > b/osaf/libs/agents/saf/lga/lga.h
>> > --- a/osaf/libs/agents/saf/lga/lga.h
>> > +++ b/osaf/libs/agents/saf/lga/lga.h
>> > @@ -29,6 +29,7 @@
>> >  #include <ncsencdec_pub.h>
>> >  #include <ncs_util.h>
>> >  #include <logtrace.h>
>> > +#include <osaf_time.h>
>> >
>> >  #include "lgsv_msg.h"
>> >  #include "lgsv_defs.h"
>> > @@ -49,6 +50,11 @@ typedef struct lga_log_stream_hdl_rec {
>> >    unsigned int lgs_log_stream_id; /* server reference
>> for this log
>> > stream */
>> >    struct lga_log_stream_hdl_rec *next;    /* next pointer for
>> the list in
>> > lga_client_hdl_rec_t */
>> >    struct lga_client_hdl_rec *parent_hdl;  /* Back Pointer to
>> the client
>> > instantiation */
>> > +  /* This flag is used with recovery handling. It is valid only
>> > +   * after server down has happened (will be initiated when
>> server
>> > down
>> > +   * event occurs). It's not valid in LGA_NORMAL state.
>> > +   */
>> > +  bool recovered_flag;
>> >  } lga_log_stream_hdl_rec_t;
>> >
>> >  /* LGA client record */
>> > @@ -59,8 +65,46 @@ typedef struct lga_client_hdl_rec {
>> >    lga_log_stream_hdl_rec_t *stream_list;  /* List of open
>> streams per
>> > client */
>> >    SYSF_MBX mbx;           /* priority q mbx
>> b/w MDS & Library */
>> >    struct lga_client_hdl_rec *next;        /* next pointer for
>> the list in
>> > lga_cb_t */
>> > +  /* These flags are used with recovery handling. They are valid
>> only
>> > +   * after server down has happened (will be initiated when
>> server
>> > down
>> > +   * event occurs). They are not valid in LGA_NORMAL state
>> > +   */
>> > +  bool initialized_flag;      /* Used with "headless" recovery
>> > handling
>> > +                               * Set when client
>> is initialized. Streams
>> > +                               * may not have
>> been recovered
>> > +                               */
>> > +  bool recovered_flag;        /* Used with "headless" recovery
>> > handling
>> > +                               * Set when client
>> is initialized an all
>> > +                               * streams are
>> recovered
>> > +                               */
>> >  } lga_client_hdl_rec_t;
>> >
>> > +/* States of the server */
>> > +typedef enum {
>> > +  LGS_START,      /* The state before agent is started
>> > +                   */
>> > +  LGS_DOWN,       /* Server is down (headless)
>> > +                   */
>> > +  LGS_NO_ACTIVE,  /* No active server (switch/fail - over)
>> > +                   */
>> > +  LGS_UP          /* Server is up
>> > +                   */
>> > +} lgs_state_t;
>> > +
>> > +/* Agent internal states */
>> > +typedef enum {
>> > +  LGA_NORMAL,     /* Server is up and no recovery is ongoing
>> > +                   */
>> > +  LGA_NO_SERVER,  /* No Server (Server down) state
>> > +                   */
>> > +  LGA_RECOVERY1,  /* Server is up. Recover clients and streams
>> when
>> > +                   * request from client.
>> > +                   * Recovery1 timer is running
>> > +                   */
>> > +  LGA_RECOVERY2   /* Auto recover remaining clients and
>> streams
>> > +                   * After recovery1 timeout
>> > +                   */
>> > +} lga_state_t;
>> >  /*
>> >   * The LGA control block is the master anchor structure for all LGA
>> >   * instantiations within a process.
>> > @@ -70,8 +114,8 @@ typedef struct {
>> >    lga_client_hdl_rec_t *client_list;      /* LGA client
>> handle database */
>> >    MDS_HDL mds_hdl;        /* MDS handle */
>> >    MDS_DEST lgs_mds_dest;  /* LGS absolute/virtual address */
>> > -  int lgs_up;             /* Indicate that MDS subscription
>> > -                           * is complete */
>> > +  lgs_state_t lgs_state;  /* Indicate current server MDS
>> state */
>> > +  lga_state_t lga_state;  /* Indicate current state of the agent */
>> >    /* LGS LGA sync params */
>> >    int lgs_sync_awaited;
>> >    NCS_SEL_OBJ lgs_sync_sel;
>> > @@ -88,8 +132,9 @@ extern uint32_t lga_mds_msg_async_send(l
>> >  extern void lgsv_lga_evt_free(struct lgsv_msg *);
>> >
>> >  /* lga_init.c */
>> > -extern unsigned int lga_startup(void);
>> > -extern unsigned int lga_shutdown(void);
>> > +unsigned int lga_startup(lga_cb_t *cb);
>> > +extern unsigned int lga_shutdown_after_last_client(void);
>> > +extern unsigned int lga_force_shutdown(void);
>> >
>> >  /* lga_hdl.c */
>> >  extern SaAisErrorT lga_hdl_cbk_dispatch(lga_cb_t *,
>> > lga_client_hdl_rec_t *, SaDispatchFlagsT);
>> > diff --git a/osaf/libs/agents/saf/lga/lga_api.c
>> > b/osaf/libs/agents/saf/lga/lga_api.c
>> > --- a/osaf/libs/agents/saf/lga/lga_api.c
>> > +++ b/osaf/libs/agents/saf/lga/lga_api.c
>> > @@ -16,8 +16,11 @@
>> >   */
>> >
>> >  #include <string.h>
>> > +#include <saf_error.h>
>> >  #include "lga.h"
>> >
>> > +#include "lga_state.h"
>> > +
>> >  #define NCS_SAF_MIN_ACCEPT_TIME 10
>> >
>> >  /* Macro to validate the dispatch flags */
>> > @@ -38,6 +41,8 @@
>> >  /* The main controle block */
>> >  lga_cb_t lga_cb = {
>> >    .cb_lock = PTHREAD_MUTEX_INITIALIZER,
>> > +  .lga_state = LGA_NORMAL,
>> > +  .lgs_state = LGS_START
>> >  };
>> >
>> >  static void populate_open_params(lgsv_stream_open_req_t
>> *open_param,
>> > @@ -116,20 +121,16 @@ SaAisErrorT saLogInitialize(SaLogHandleT
>> >  {
>> >    lga_client_hdl_rec_t *lga_hdl_rec;
>> >    lgsv_msg_t i_msg, *o_msg;
>> > -  SaAisErrorT rc;
>> > -  uint32_t client_id;
>> > +  SaAisErrorT ais_rc = SA_AIS_OK;
>> > +  int rc;
>> > +  uint32_t client_id = 0;
>> >
>> >    TRACE_ENTER();
>> >
>> > +  /* Verify parameters (log handle and that version is given) */
>> >    if ((logHandle == NULL) || (version == NULL)) {
>> >            TRACE("version or handle FAILED");
>> > -          rc = SA_AIS_ERR_INVALID_PARAM;
>> > -          goto done;
>> > -  }
>> > -
>> > -  if ((rc = lga_startup()) != NCSCC_RC_SUCCESS) {
>> > -          TRACE("lga_startup FAILED: %u", rc);
>> > -          rc = SA_AIS_ERR_LIBRARY;
>> > +          ais_rc = SA_AIS_ERR_INVALID_PARAM;
>> >            goto done;
>> >    }
>> >
>> > @@ -144,15 +145,59 @@ SaAisErrorT saLogInitialize(SaLogHandleT
>> >            version->releaseCode = LOG_RELEASE_CODE;
>> >            version->majorVersion =
>> LOG_MAJOR_VERSION;
>> >            version->minorVersion =
>> LOG_MINOR_VERSION;
>> > -          lga_shutdown();
>> > -          rc = SA_AIS_ERR_VERSION;
>> > +          lga_shutdown_after_last_client();
>> > +          ais_rc = SA_AIS_ERR_VERSION;
>> >            goto done;
>> >    }
>> >
>> > -  if (!lga_cb.lgs_up) {
>> > -          lga_shutdown();
>> > -          TRACE("LGS server is down");
>> > -          rc = SA_AIS_ERR_TRY_AGAIN;
>> > +  /***
>> > +   * Handle states
>> > +   * Synchronize with mds and recovery thread (mutex)
>> > +   * NOTE: Nothing to handle if recovery state 1
>> > +   */
>> > +  pthread_mutex_lock(&lga_cb.cb_lock);
>> > +  lgs_state_t lgs_state = lga_cb.lgs_state;
>> > +  lga_state_t lga_state = lga_cb.lga_state;
>> > +  pthread_mutex_unlock(&lga_cb.cb_lock);
>> > +
>> > +  if (lgs_state == LGS_NO_ACTIVE) {
>> > +          /* We have a server but it is temporary
>> unavailable. Client may
>> > +           * try again
>> > +           */
>> > +          TRACE("%s LGS no active", __FUNCTION__);
>> > +          ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> > +          goto done;
>> > +  }
>> > +
>> > +  if (lga_state == LGA_NO_SERVER) {
>> > +          /* We have no server and cannot initialize.
>> > +           * The client may try again
>> > +           */
>> > +          TRACE("%s No server", __FUNCTION__);
>> > +          ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> > +          goto done;
>> > +  }
>> > +
>> > +  if (lga_state == LGA_RECOVERY2) {
>> > +          /* Auto recovery is ongoing. We have to wait for
>> it to finish.
>> > +           * The client may try again
>> > +           */
>> > +          TRACE("%s LGA auto recovery ongoing (2)",
>> __FUNCTION__);
>> > +          ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> > +          goto done;
>> > +  }
>> > +
>> > +  /***
>> > +   * Do Initiate. It's ok to initiate in recovery state 1 since we
>> > have a
>> > +   * server and is not conflicting with auto recovery
>> > +   */
>> > +
>> > +  /* Initiate the client in the agent and if first client also start
>> > MDS
>> > +   * etc.
>> > +   */
>> > +  if ((rc = lga_startup(&lga_cb)) != NCSCC_RC_SUCCESS) {
>> > +          TRACE("lga_startup FAILED: %u", rc);
>> > +          ais_rc = SA_AIS_ERR_LIBRARY;
>> >            goto done;
>> >    }
>> >
>> > @@ -165,18 +210,18 @@ SaAisErrorT saLogInitialize(SaLogHandleT
>> >    /* Send a message to LGS to obtain a client_id/server ref id
>> which
>> > is cluster
>> >     * wide unique.
>> >     */
>> > -  rc = lga_mds_msg_sync_send(&lga_cb, &i_msg, &o_msg,
>> > LGS_WAIT_TIME,MDS_SEND_PRIORITY_HIGH);
>> > +  rc = lga_mds_msg_sync_send(&lga_cb, &i_msg, &o_msg,
>> LGS_WAIT_TIME,
>> > MDS_SEND_PRIORITY_HIGH);
>> >    if (rc != NCSCC_RC_SUCCESS) {
>> > -          lga_shutdown();
>> > -          rc = SA_AIS_ERR_TRY_AGAIN;
>> > +          lga_shutdown_after_last_client();
>> > +          ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> >            goto done;
>> >    }
>> >
>> >      /** Make sure the LGS return status was SA_AIS_OK
>> >       **/
>> > -  if (SA_AIS_OK != o_msg->info.api_resp_info.rc) {
>> > -          rc = o_msg->info.api_resp_info.rc;
>> > -          TRACE("LGS return FAILED");
>> > +  ais_rc = o_msg->info.api_resp_info.rc;
>> > +  if (SA_AIS_OK != ais_rc) {
>> > +          TRACE("%s LGS error response %s",
>> __FUNCTION__,
>> > saf_error(ais_rc));
>> >            goto err;
>> >    }
>> >
>> > @@ -189,27 +234,26 @@ SaAisErrorT saLogInitialize(SaLogHandleT
>> >    /* create the hdl record & store the callbacks */
>> >    lga_hdl_rec = lga_hdl_rec_add(&lga_cb, callbacks, client_id);
>> >    if (lga_hdl_rec == NULL) {
>> > -          rc = SA_AIS_ERR_NO_MEMORY;
>> > +          ais_rc = SA_AIS_ERR_NO_MEMORY;
>> >            goto err;
>> >    }
>> >
>> >    /* pass the handle value to the appl */
>> > -  if (SA_AIS_OK == rc)
>> > -          *logHandle = lga_hdl_rec->local_hdl;
>> > +  *logHandle = lga_hdl_rec->local_hdl;
>> >
>> >   err:
>> >    /* free up the response message */
>> >    if (o_msg)
>> >            lga_msg_destroy(o_msg);
>> >
>> > -  if (rc != SA_AIS_OK) {
>> > -          TRACE_2("LGA INIT FAILED\n");
>> > -          lga_shutdown();
>> > +  if (ais_rc != SA_AIS_OK) {
>> > +          TRACE_2("%s LGA INIT FAILED\n",
>> __FUNCTION__);
>> > +          lga_shutdown_after_last_client();
>> >    }
>> >
>> >   done:
>> > -  TRACE_LEAVE();
>> > -  return rc;
>> > +  TRACE_LEAVE2("client_id = %d", client_id);
>> > +  return ais_rc;
>> >  }
>> >
>> >
>> >
>> /**********************************************************
>> *****************
>> > @@ -325,6 +369,61 @@ SaAisErrorT saLogDispatch(SaLogHandleT l
>> >    return rc;
>> >  }
>> >
>> >
>> +/*********************************************************
>> *********************
>> > + * Finalize
>> > + * API function and help functions
>> > +
>> >
>> **********************************************************
>> ********************/
>> > +
>> > +/**
>> > + * Create and send a Finalize message to the server
>> > + *
>> > + * @param hdl_rec
>> > + * @return AIS return code
>> > + */
>> > +static SaAisErrorT send_Finalize_msg(lga_client_hdl_rec_t *hdl_rec)
>> > +{
>> > +  uint32_t mds_rc;
>> > +  lgsv_msg_t msg, *o_msg = NULL;
>> > +  SaAisErrorT ais_rc = SA_AIS_OK;
>> > +
>> > +  TRACE_ENTER();
>> > +
>> > +  memset(&msg, 0, sizeof(lgsv_msg_t));
>> > +  msg.type = LGSV_LGA_API_MSG;
>> > +  msg.info.api_info.type = LGSV_FINALIZE_REQ;
>> > +  msg.info.api_info.param.finalize.client_id =
>> > hdl_rec->lgs_client_id;
>> > +
>> > +  mds_rc = lga_mds_msg_sync_send(
>> > +          &lga_cb, &msg,
>> > +          &o_msg,
>> > +          LGS_WAIT_TIME,
>> > +          MDS_SEND_PRIORITY_MEDIUM
>> > +          );
>> > +  switch (mds_rc) {
>> > +  case NCSCC_RC_SUCCESS:
>> > +          break;
>> > +  case NCSCC_RC_REQ_TIMOUT:
>> > +          ais_rc = SA_AIS_ERR_TIMEOUT;
>> > +          TRACE("lga_mds_msg_sync_send FAILED: %s",
>> saf_error(ais_rc));
>> > +          goto done;
>> > +  default:
>> > +          TRACE("lga_mds_msg_sync_send FAILED: %s",
>> saf_error(ais_rc));
>> > +          ais_rc = SA_AIS_ERR_NO_RESOURCES;
>> > +          goto done;
>> > +  }
>> > +
>> > +  if (o_msg != NULL) {
>> > +          ais_rc = o_msg->info.api_resp_info.rc;
>> > +          lga_msg_destroy(o_msg);
>> > +  } else
>> > +          ais_rc = SA_AIS_ERR_NO_RESOURCES;
>> > +
>> > +  done:
>> > +
>> > +  TRACE_LEAVE();
>> > +  return ais_rc;
>> > +}
>> > +
>> >
>> >
>> /**********************************************************
>> *****************
>> >   * 8.4.4
>> >   *
>> > @@ -350,84 +449,119 @@ SaAisErrorT saLogDispatch(SaLogHandleT l
>> >  SaAisErrorT saLogFinalize(SaLogHandleT logHandle)
>> >  {
>> >    lga_client_hdl_rec_t *hdl_rec;
>> > -  lgsv_msg_t msg, *o_msg = NULL;
>> > -  SaAisErrorT rc = SA_AIS_OK;
>> > -  uint32_t mds_rc;
>> > +  SaAisErrorT ais_rc = SA_AIS_OK;
>> > +  uint32_t rc;
>> >
>> >    TRACE_ENTER();
>> >
>> >    /* retrieve hdl rec */
>> >    hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA, logHandle);
>> >    if (hdl_rec == NULL) {
>> > -          TRACE("ncshm_take_hdl failed");
>> > -          rc = SA_AIS_ERR_BAD_HANDLE;
>> > +          TRACE("%s ncshm_take_hdl failed",
>> __FUNCTION__);
>> > +          ais_rc = SA_AIS_ERR_BAD_HANDLE;
>> >            goto done;
>> >    }
>> >
>> > -  /* Check Whether LGS is up or not */
>> > -  if (!lga_cb.lgs_up) {
>> > -          TRACE("LGS down");
>> > -          rc = SA_AIS_ERR_TRY_AGAIN;
>> > +  /***
>> > +   * Handle states
>> > +   * Synchronize with mds and recovery thread (mutex)
>> > +   */
>> > +  pthread_mutex_lock(&lga_cb.cb_lock);
>> > +  lgs_state_t lgs_state = lga_cb.lgs_state;
>> > +  lga_state_t lga_state = lga_cb.lga_state;
>> > +  pthread_mutex_unlock(&lga_cb.cb_lock);
>> > +
>> > +  if (lgs_state == LGS_NO_ACTIVE) {
>> > +          /* We have a server but it is temporary
>> unavailable. Client may
>> > +           * try again
>> > +           */
>> > +          TRACE("%s lgs_state = LGS no active",
>> __FUNCTION__);
>> > +          ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> >            goto done_give_hdl;
>> >    }
>> >
>> > -    /** populate & send the finalize message
>> > -     ** and make sure the finalize from the server
>> > -     ** end returned before deleting the local records.
>> > -     **/
>> > -  memset(&msg, 0, sizeof(lgsv_msg_t));
>> > -  msg.type = LGSV_LGA_API_MSG;
>> > -  msg.info.api_info.type = LGSV_FINALIZE_REQ;
>> > -  msg.info.api_info.param.finalize.client_id =
>> > hdl_rec->lgs_client_id;
>> > -
>> > -  mds_rc = lga_mds_msg_sync_send(&lga_cb, &msg, &o_msg,
>> > LGS_WAIT_TIME,MDS_SEND_PRIORITY_MEDIUM);
>> > -  switch (mds_rc) {
>> > -  case NCSCC_RC_SUCCESS:
>> > -          break;
>> > -  case NCSCC_RC_REQ_TIMOUT:
>> > -          rc = SA_AIS_ERR_TIMEOUT;
>> > -          TRACE("lga_mds_msg_sync_send FAILED: %u",
>> rc);
>> > -          goto done_give_hdl;
>> > -  default:
>> > -          TRACE("lga_mds_msg_sync_send FAILED: %u",
>> rc);
>> > -          rc = SA_AIS_ERR_NO_RESOURCES;
>> > +  if (lga_state == LGA_NO_SERVER) {
>> > +          /* We have no server but can still finalize client.
>> > +           * In this situation no message to server is sent
>> > +           */
>> > +          TRACE("%s lga_state = LGS down",
>> __FUNCTION__);
>> > +          ais_rc = SA_AIS_OK;
>> >            goto done_give_hdl;
>> >    }
>> >
>> > -  if (o_msg != NULL) {
>> > -          rc = o_msg->info.api_resp_info.rc;
>> > -          lga_msg_destroy(o_msg);
>> > -  } else
>> > -          rc = SA_AIS_ERR_NO_RESOURCES;
>> > +  if (lga_state == LGA_RECOVERY2) {
>> > +          /* Auto recovery is ongoing. We have to wait for
>> it to finish.
>> > +           * The client may try again
>> > +           */
>> > +          TRACE("%s lga_state = LGA auto recovery
>> ongoing (2)",
>> > __FUNCTION__);
>> > +          ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> > +          goto done_give_hdl;
>> > +  }
>> >
>> > -  if (rc == SA_AIS_OK) {
>> > -          rc = lga_hdl_rec_del(&lga_cb.client_list,
>> hdl_rec);
>> > -          if (rc != NCSCC_RC_SUCCESS)
>> > -                  rc = SA_AIS_ERR_BAD_HANDLE;
>> > +  if (lga_state == LGA_RECOVERY1) {
>> > +          /* We are in recovery state 1. Client may or may
>> not have been
>> > +           * initialized. If initialized a finalize request must
>> be sent
>> > +           * to the server else the client is finalized in the
>> agent only
>> > +           */
>> > +          TRACE("%s lga_state = Recovery state (1)",
>> __FUNCTION__);
>> > +          if (hdl_rec->initialized_flag == false) {
>> > +                  TRACE("\t Client is not
>> initialized");
>> > +                  goto done_give_hdl;
>> > +          }
>> > +          TRACE("\t Client is initialized");
>> > +  }
>> > +
>> > +  /***
>> > +   * Populate & send the finalize message
>> > +   * and make sure the finalize from the server
>> > +   * end returned before deleting the local records.
>> > +   */
>> > +  ais_rc = send_Finalize_msg(hdl_rec);
>> > +
>> > +  if (ais_rc == SA_AIS_OK) {
>> > +          TRACE("%s delete_one_client",
>> __FUNCTION__);
>> > +          (void) delete_one_client(&lga_cb.client_list,
>> hdl_rec);
>> >    }
>> >
>> >   done_give_hdl:
>> >    ncshm_give_hdl(logHandle);
>> >
>> > -  if (rc == SA_AIS_OK) {
>> > -          rc = lga_shutdown();
>> > +  if (ais_rc == SA_AIS_OK) {
>> > +          rc = lga_shutdown_after_last_client();
>> >            if (rc != NCSCC_RC_SUCCESS)
>> >                    TRACE("lga_shutdown ");
>> >    }
>> >
>> >   done:
>> > -  TRACE_LEAVE2("rc = %u", rc);
>> > -  return rc;
>> > +  TRACE_LEAVE2("ais_rc = %s", saf_error(ais_rc));
>> > +  return ais_rc;
>> >  }
>> >
>> > -static SaAisErrorT validate_open_params(SaLogHandleT logHandle,
>> > -                                  const
>> SaNameT *logStreamName,
>> > -                                  const
>> SaLogFileCreateAttributesT_2 *logFileCreateAttributes,
>> > -
>>      SaLogStreamOpenFlagsT logStreamOpenFlags,
>> > -
>>      SaTimeT timeOut, SaLogStreamHandleT *logStreamHandle,
>> uint32_t
>> > *header_type)
>> >
>> +/*********************************************************
>> *********************
>> > + * Open a stream
>> > + * API function and help functions
>> > +
>> >
>> **********************************************************
>> ********************/
>> > +
>> > +/**
>> > + * Check input parameters for opening a stream
>> > + *
>> > + * @param logStreamName
>> > + * @param logFileCreateAttributes
>> > + * @param logStreamOpenFlags
>> > + * @param logStreamHandle
>> > + * @param header_type
>> > + * @return
>> > + */
>> > +static SaAisErrorT validate_open_params(
>> > +  const SaNameT *logStreamName,
>> > +  const SaLogFileCreateAttributesT_2 *logFileCreateAttributes,
>> > +  SaLogStreamOpenFlagsT logStreamOpenFlags,
>> > +  SaLogStreamHandleT *logStreamHandle,
>> > +  uint32_t *header_type
>> > +  )
>> >  {
>> >    size_t len;
>> > -  SaAisErrorT rc = SA_AIS_OK;
>> > +  SaAisErrorT ais_rc = SA_AIS_OK;
>> >
>> >    TRACE_ENTER();
>> >
>> > @@ -435,7 +569,7 @@ static SaAisErrorT validate_open_params(
>> >
>> >    if ((NULL == logStreamName) || (NULL == logStreamHandle)) {
>> >            TRACE("SA_AIS_ERR_INVALID_PARAM => NULL
>> pointer check");
>> > -          rc = SA_AIS_ERR_INVALID_PARAM;
>> > +          ais_rc = SA_AIS_ERR_INVALID_PARAM;
>> >            goto done;
>> >    }
>> >
>> > @@ -450,7 +584,7 @@ static SaAisErrorT validate_open_params(
>> >     */
>> >    if (logStreamName->length >= SA_MAX_NAME_LENGTH) {
>> >            TRACE("SA_AIS_ERR_INVALID_PARAM,
>> logStreamName->length >
>> > SA_MAX_NAME_LENGTH");
>> > -          rc = SA_AIS_ERR_INVALID_PARAM;
>> > +          ais_rc = SA_AIS_ERR_INVALID_PARAM;
>> >            goto done;
>> >    }
>> >
>> > @@ -575,10 +709,11 @@ static SaAisErrorT validate_open_params(
>> >
>> >   done:
>> >    TRACE_LEAVE();
>> > -  return rc;
>> > +  return ais_rc;
>> >  }
>> >
>> >  /**
>> > + * API function for opening a stream
>> >   *
>> >   * @param logHandle
>> >   * @param logStreamName
>> > @@ -599,29 +734,85 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl
>> >    lga_client_hdl_rec_t *hdl_rec;
>> >    lgsv_msg_t msg, *o_msg = NULL;
>> >    lgsv_stream_open_req_t *open_param;
>> > -  SaAisErrorT rc;
>> > +  SaAisErrorT ais_rc;
>> > +  int rc = 0;
>> > +  uint32_t ncs_rc;
>> >    uint32_t timeout;
>> >    uint32_t log_stream_id;
>> >    uint32_t log_header_type = 0;
>> >
>> >    TRACE_ENTER();
>> >
>> > -  rc = validate_open_params(logHandle, logStreamName,
>> > logFileCreateAttributes,
>> > -
>> logStreamOpenFlags, timeOut, logStreamHandle,
>> > &log_header_type);
>> > -  if (rc != SA_AIS_OK)
>> > +  ais_rc = validate_open_params(logStreamName,
>> > logFileCreateAttributes,
>> > +
>> logStreamOpenFlags, logStreamHandle, &log_header_type);
>> > +  if (ais_rc != SA_AIS_OK)
>> >            goto done;
>> >
>> >    /* retrieve log service hdl rec */
>> >    hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA, logHandle);
>> >    if (hdl_rec == NULL) {
>> >            TRACE("ncshm_take_hdl failed");
>> > -          rc = SA_AIS_ERR_BAD_HANDLE;
>> > +          ais_rc = SA_AIS_ERR_BAD_HANDLE;
>> >            goto done;
>> >    }
>> >
>> > +  /***
>> > +   * Handle states
>> > +   * Synchronize with mds and recovery thread (mutex)
>> > +   */
>> > +  pthread_mutex_lock(&lga_cb.cb_lock);
>> > +  lgs_state_t lgs_state = lga_cb.lgs_state;
>> > +  lga_state_t lga_state = lga_cb.lga_state;
>> > +  pthread_mutex_unlock(&lga_cb.cb_lock);
>> > +
>> > +  if (lgs_state == LGS_NO_ACTIVE) {
>> > +          /* We have a server but it is temporary
>> unavailable. Client may
>> > +           * try again
>> > +           */
>> > +          TRACE("%s LGS no active", __FUNCTION__);
>> > +          ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> > +          goto done_give_hdl;
>> > +  }
>> > +
>> > +  if (lga_state == LGA_NO_SERVER) {
>> > +          /* We have no server and cannot open a
>> stream.
>> > +           * The client may try again
>> > +           */
>> > +          TRACE("%s No server", __FUNCTION__);
>> > +          ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> > +          goto done_give_hdl;
>> > +  }
>> > +
>> > +  if (lga_state == LGA_RECOVERY2) {
>> > +          /* Auto recovery is ongoing. We have to wait for
>> it to finish.
>> > +           * The client may try again
>> > +           */
>> > +          TRACE("%s LGA auto recovery ongoing (2)",
>> __FUNCTION__);
>> > +          ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> > +          goto done_give_hdl;
>> > +  }
>> > +
>> > +  if (lga_state == LGA_RECOVERY1) {
>> > +          /* We are in recovery 1 state.
>> > +           * Recover client and execute the request
>> > +           */
>> > +          rc = recover_one_client(hdl_rec);
>> > +          if (rc == -1) {
>> > +                  /* Client could not be recovered.
>> Delete client and
>> > +                   * return BAD HANDLE
>> > +                   */
>> > +                  TRACE("%s delete_one_client",
>> __FUNCTION__);
>> > +                  (void)
>> delete_one_client(&lga_cb.client_list, hdl_rec);
>> > +                  ais_rc =
>> SA_AIS_ERR_BAD_HANDLE;
>> > +                  /* Handles are destroyed so we
>> shall not give handles */
>> > +                  goto done;
>> > +          }
>> > +  }
>> > +
>> > +
>> >      /** Populate a sync MDS message to obtain a log stream id and an
>> > -     **  instance open id.
>> > -     **/
>> > +     *  instance open id.
>> > +     */
>> >    memset(&msg, 0, sizeof(lgsv_msg_t));
>> >    msg.type = LGSV_LGA_API_MSG;
>> >    msg.info.api_info.type = LGSV_STREAM_OPEN_REQ;
>> > @@ -641,7 +832,7 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl
>> >            /* Construct the logFileName */
>> >            open_param->logFileName = (char *)
>> > malloc(strlen(logFileCreateAttributes->logFileName) + 1);
>> >            if (open_param->logFileName == NULL) {
>> > -                  rc = SA_AIS_ERR_NO_MEMORY;
>> > +                  ais_rc =
>> SA_AIS_ERR_NO_MEMORY;
>> >                    goto done_give_hdl;
>> >            }
>> >            strcpy(open_param->logFileName,
>> > logFileCreateAttributes->logFileName);
>> > @@ -653,7 +844,7 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl
>> >
>> >            open_param->logFilePathName = (char *)
>> malloc(len);
>> >            if (open_param->logFilePathName == NULL) {
>> > -                  rc = SA_AIS_ERR_NO_MEMORY;
>> > +                  ais_rc =
>> SA_AIS_ERR_NO_MEMORY;
>> >                    goto done_give_hdl;
>> >            }
>> >
>> > @@ -668,31 +859,21 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl
>> >
>> >    if (timeout < NCS_SAF_MIN_ACCEPT_TIME) {
>> >            TRACE("Timeout");
>> > -          rc = SA_AIS_ERR_TIMEOUT;
>> > -          goto done_give_hdl;
>> > -  }
>> > -
>> > -  /* Check whether LGS is up or not */
>> > -  if (!lga_cb.lgs_up) {
>> > -          TRACE("LGS down");
>> > -          rc = SA_AIS_ERR_TRY_AGAIN;
>> > +          ais_rc = SA_AIS_ERR_TIMEOUT;
>> >            goto done_give_hdl;
>> >    }
>> >
>> >    /* Send a sync MDS message to obtain a log stream id */
>> > -  rc = lga_mds_msg_sync_send(&lga_cb, &msg, &o_msg,
>> > timeout,MDS_SEND_PRIORITY_HIGH);
>> > -  if (rc != NCSCC_RC_SUCCESS) {
>> > -          if (o_msg)
>> > -                  lga_msg_destroy(o_msg);
>> > -          rc = SA_AIS_ERR_TRY_AGAIN;
>> > +  ncs_rc = lga_mds_msg_sync_send(&lga_cb, &msg, &o_msg,
>> timeout,
>> > MDS_SEND_PRIORITY_HIGH);
>> > +  if (ncs_rc != NCSCC_RC_SUCCESS) {
>> > +          ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> >            goto done_give_hdl;
>> >    }
>> >
>> > -  if (SA_AIS_OK != o_msg->info.api_resp_info.rc) {
>> > -          rc = o_msg->info.api_resp_info.rc;
>> > -          TRACE("Bad return status!!! rc = %d", rc);
>> > -          if (o_msg)
>> > -                  lga_msg_destroy(o_msg);
>> > +  ais_rc = o_msg->info.api_resp_info.rc;
>> > +  if (SA_AIS_OK != ais_rc) {
>> > +          TRACE("Bad return status!!! rc = %d", ais_rc);
>> > +          lga_msg_destroy(o_msg);
>> >            goto done_give_hdl;
>> >    }
>> >
>> > @@ -708,25 +889,28 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl
>> >      /** Allocate an LGA_LOG_STREAM_HDL_REC structure and insert this
>> >       *  into the list of channel hdl record.
>> >       **/
>> > -  lstr_hdl_rec = lga_log_stream_hdl_rec_add(&hdl_rec,
>> > -
>>        log_stream_id, logStreamOpenFlags, logStreamName,
>> > log_header_type);
>> > +  lstr_hdl_rec = lga_log_stream_hdl_rec_add(
>> > +          &hdl_rec,
>> > +          log_stream_id, logStreamOpenFlags,
>> > +          logStreamName, log_header_type
>> > +          );
>> >    if (lstr_hdl_rec == NULL) {
>> >            pthread_mutex_unlock(&lga_cb.cb_lock);
>> > -          if (o_msg)
>> > -                  lga_msg_destroy(o_msg);
>> > -          rc = SA_AIS_ERR_NO_MEMORY;
>> > +          lga_msg_destroy(o_msg);
>> > +          ais_rc = SA_AIS_ERR_NO_MEMORY;
>> >            goto done_give_hdl;
>> >    }
>> > +
>> >      /** UnLock LGA_CB
>> >       **/
>> >    pthread_mutex_unlock(&lga_cb.cb_lock);
>> >
>> > -    /** Give the hdl-mgr allocated hdl to the application.
>> > -     **/
>> > +   /** Give the hdl-mgr allocated hdl to the application and free
>> the
>> > response
>> > +    *  message
>> > +      **/
>> >    *logStreamHandle =
>> > (SaLogStreamHandleT)lstr_hdl_rec->log_stream_hdl;
>> >
>> > -  if (o_msg)
>> > -          lga_msg_destroy(o_msg);
>> > +  lga_msg_destroy(o_msg);
>> >
>> >   done_give_hdl:
>> >    ncshm_give_hdl(logHandle);
>> > @@ -735,7 +919,7 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl
>> >
>> >   done:
>> >    TRACE_LEAVE();
>> > -  return rc;
>> > +  return ais_rc;
>> >  }
>> >
>> >  /**
>> > @@ -758,6 +942,136 @@ SaAisErrorT saLogStreamOpenAsync_2(SaLog
>> >    return SA_AIS_ERR_NOT_SUPPORTED;
>> >  }
>> >
>> >
>> +/*********************************************************
>> *********************
>> > + * Write Log record
>> > + * API function and help functions
>> > +
>> >
>> **********************************************************
>> ********************/
>> > +
>> > +/**
>> > + * Validate the log record and if generic header add
>> > + * logSvcUsrName from environment variable
>> SA_AMF_COMPONENT_NAME
>> > + *
>> > + * @param logRecord[in]
>> > + * @param logSvcUsrName[out]
>> > + * @param write_param[out]
>> > + * @return AIS return code
>> > + */
>> > +static SaAisErrorT handle_log_record(const SaLogRecordT *logRecord,
>> > +  SaNameT *logSvcUsrName,
>> > +  lgsv_write_log_async_req_t *write_param)
>> > +{
>> > +  SaAisErrorT ais_rc = SA_AIS_OK;
>> > +  SaTimeT logTimeStamp;
>> > +
>> > +  TRACE_ENTER();
>> > +
>> > +  if (NULL == logRecord) {
>> > +          TRACE("SA_AIS_ERR_INVALID_PARAM => NULL
>> pointer check");
>> > +          ais_rc = SA_AIS_ERR_INVALID_PARAM;
>> > +          goto done;
>> > +  }
>> > +
>> > +  if (logRecord->logHdrType == SA_LOG_GENERIC_HEADER) {
>> > +          switch (logRecord-
>> >logHeader.genericHdr.logSeverity) {
>> > +          case SA_LOG_SEV_EMERGENCY:
>> > +          case SA_LOG_SEV_ALERT:
>> > +          case SA_LOG_SEV_CRITICAL:
>> > +          case SA_LOG_SEV_ERROR:
>> > +          case SA_LOG_SEV_WARNING:
>> > +          case SA_LOG_SEV_NOTICE:
>> > +          case SA_LOG_SEV_INFO:
>> > +                  break;
>> > +          default:
>> > +                  TRACE("Invalid severity: %x",
>> > +                          logRecord-
>> >logHeader.genericHdr.logSeverity);
>> > +                  ais_rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > +                  goto done;
>> > +          }
>> > +  }
>> > +
>> > +  if (logRecord->logBuffer != NULL) {
>> > +          if ((logRecord->logBuffer->logBuf == NULL) &&
>> > +                  (logRecord->logBuffer-
>> >logBufSize != 0)) {
>> > +                  TRACE("logBuf == NULL &&
>> logBufSize != 0");
>> > +                  ais_rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > +                  goto done;
>> > +          }
>> > +  }
>> > +
>> > +  /* Set timeStamp data if not provided by application user */
>> > +  if (logRecord->logTimeStamp == SA_TIME_UNKNOWN) {
>> > +          logTimeStamp = setLogTime();
>> > +          write_param->logTimeStamp = &logTimeStamp;
>> > +  } else {
>> > +          write_param->logTimeStamp = (SaTimeT
>> *)&logRecord->logTimeStamp;
>> > +  }
>> > +
>> > +  /* SA_AIS_ERR_INVALID_PARAM, bullet 2 in SAI-AIS-LOG-
>> A.02.01
>> > +     Section 3.6.3, Return Values */
>> > +  if (logRecord->logHdrType == SA_LOG_GENERIC_HEADER) {
>> > +          if (logRecord-
>> >logHeader.genericHdr.logSvcUsrName == NULL) {
>> > +                  char *logSvcUsrChars = NULL;
>> > +                  TRACE("logSvcUsrName ==
>> NULL");
>> > +                  logSvcUsrChars =
>> getenv("SA_AMF_COMPONENT_NAME");
>> > +                  if (logSvcUsrChars == NULL) {
>> > +                          ais_rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > +                          goto done;
>> > +                  }
>> > +                  logSvcUsrName->length =
>> strlen(logSvcUsrChars);
>> > +                  if (logSvcUsrName->length >=
>> SA_MAX_NAME_LENGTH) {
>> > +
>>      TRACE("SA_AMF_COMPONENT_NAME is too long");
>> > +                          ais_rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > +                          goto done;
>> > +                  }
>> > +                  strcpy((char *)logSvcUsrName-
>> >value, logSvcUsrChars);
>> > +                  write_param->logSvcUsrName =
>> logSvcUsrName;
>> > +          } else {
>> > +                  if (logRecord-
>> >logHeader.genericHdr.logSvcUsrName->length >=
>> > +
>>      SA_MAX_NAME_LENGTH) {
>> > +
>>      TRACE("logSvcUsrName too long");
>> > +                          ais_rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > +                          goto done;
>> > +                  }
>> > +                  logSvcUsrName->length =
>> > +                          logRecord-
>> >logHeader.genericHdr.logSvcUsrName->length;
>> > +                  write_param->logSvcUsrName =
>> > +                          (SaNameT
>> *)logRecord->logHeader.genericHdr.logSvcUsrName;
>> > +          }
>> > +  }
>> > +
>> > +  if (logRecord->logHdrType == SA_LOG_NTF_HEADER) {
>> > +          if (logRecord-
>> >logHeader.ntfHdr.notificationObject == NULL) {
>> > +                  TRACE("notificationObject ==
>> NULL");
>> > +                  ais_rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > +                  goto done;
>> > +          }
>> > +
>> > +          if (logRecord-
>> >logHeader.ntfHdr.notificationObject->length >=
>> > +                  SA_MAX_NAME_LENGTH) {
>> > +                  TRACE("notificationObject.length
>> >= SA_MAX_NAME_LENGTH");
>> > +                  ais_rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > +                  goto done;
>> > +          }
>> > +
>> > +          if (logRecord->logHeader.ntfHdr.notifyingObject
>> == NULL) {
>> > +                  TRACE("notifyingObject ==
>> NULL");
>> > +                  ais_rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > +                  goto done;
>> > +          }
>> > +
>> > +          if (logRecord-
>> >logHeader.ntfHdr.notifyingObject->length >=
>> > +                  SA_MAX_NAME_LENGTH) {
>> > +                  TRACE("notifyingObject.length >=
>> SA_MAX_NAME_LENGTH");
>> > +                  ais_rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > +                  goto done;
>> > +          }
>> > +  }
>> > +
>> > +done:
>> > +  TRACE_LEAVE();
>> > +  return ais_rc;
>> > +}
>> > +
>> >  /**
>> >   *
>> >   * @param logStreamHandle
>> > @@ -788,158 +1102,125 @@ SaAisErrorT saLogWriteLogAsync(SaLogStre
>> >    lga_log_stream_hdl_rec_t *lstr_hdl_rec;
>> >    lga_client_hdl_rec_t *hdl_rec;
>> >    lgsv_msg_t msg;
>> > -  SaAisErrorT rc = SA_AIS_OK;
>> > +  SaAisErrorT ais_rc = SA_AIS_OK;
>> > +  lgsv_write_log_async_req_t *write_param;
>> >    SaNameT logSvcUsrName;
>> > -  SaTimeT logTimeStamp;
>> > -  lgsv_write_log_async_req_t *write_param;
>> > +  int rc;
>> >
>> >    memset(&(msg), 0, sizeof(lgsv_msg_t));
>> >    write_param = &msg.info.api_info.param.write_log_async;
>> >    TRACE_ENTER();
>> >
>> > -  if (NULL == logRecord) {
>> > -          TRACE("SA_AIS_ERR_INVALID_PARAM => NULL
>> pointer check");
>> > -          rc = SA_AIS_ERR_INVALID_PARAM;
>> > +  if ((ackFlags != 0) && (ackFlags !=
>> SA_LOG_RECORD_WRITE_ACK)) {
>> > +          TRACE("SA_AIS_ERR_BAD_FLAGS=> ackFlags");
>> > +          ais_rc = SA_AIS_ERR_BAD_FLAGS;
>> >            goto done;
>> >    }
>> >
>> > -  if (logRecord->logHdrType == SA_LOG_GENERIC_HEADER) {
>> > -          switch (logRecord-
>> >logHeader.genericHdr.logSeverity) {
>> > -          case SA_LOG_SEV_EMERGENCY:
>> > -          case SA_LOG_SEV_ALERT:
>> > -          case SA_LOG_SEV_CRITICAL:
>> > -          case SA_LOG_SEV_ERROR:
>> > -          case SA_LOG_SEV_WARNING:
>> > -          case SA_LOG_SEV_NOTICE:
>> > -          case SA_LOG_SEV_INFO:
>> > -                  break;
>> > -          default:
>> > -                  TRACE("Invalid severity: %x",
>> > logRecord->logHeader.genericHdr.logSeverity);
>> > -                  rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > -                  goto done;
>> > -          }
>> > -  }
>> > -
>> > -  if (logRecord->logBuffer != NULL) {
>> > -          if ((logRecord->logBuffer->logBuf == NULL) &&
>> > (logRecord->logBuffer->logBufSize != 0)) {
>> > -                  TRACE("logBuf == NULL &&
>> logBufSize != 0");
>> > -                  rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > -                  goto done;
>> > -          }
>> > -  }
>> > -
>> > -  if ((ackFlags != 0) && (ackFlags !=
>> SA_LOG_RECORD_WRITE_ACK)) {
>> > -          TRACE("SA_AIS_ERR_BAD_FLAGS=> ackFlags");
>> > -          rc = SA_AIS_ERR_BAD_FLAGS;
>> > +  /* Validate the log record and if generic header add
>> > +   * logSvcUsrName from environment variable
>> SA_AMF_COMPONENT_NAME
>> > +   */
>> > +  ais_rc = handle_log_record(logRecord, &logSvcUsrName,
>> write_param);
>> > +  if (ais_rc != SA_AIS_OK) {
>> > +          TRACE("%s: Validate Log record Fail",
>> __FUNCTION__);
>> >            goto done;
>> >    }
>> >
>> > -  /* Set timeStamp data if not provided by application user */
>> > -  if (logRecord->logTimeStamp == SA_TIME_UNKNOWN) {
>> > -          logTimeStamp = setLogTime();
>> > -          write_param->logTimeStamp = &logTimeStamp;
>> > -  } else {
>> > -          write_param->logTimeStamp = (SaTimeT
>> *)&logRecord->logTimeStamp;
>> > -  }
>> > -
>> > -  /* SA_AIS_ERR_INVALID_PARAM, bullet 2 in SAI-AIS-LOG-
>> A.02.01
>> > -     Section 3.6.3, Return Values */
>> > -  if (logRecord->logHdrType == SA_LOG_GENERIC_HEADER) {
>> > -          if (logRecord-
>> >logHeader.genericHdr.logSvcUsrName == NULL) {
>> > -                  char *logSvcUsrChars = NULL;
>> > -                  TRACE("logSvcUsrName ==
>> NULL");
>> > -                  logSvcUsrChars =
>> getenv("SA_AMF_COMPONENT_NAME");
>> > -                  if (logSvcUsrChars == NULL) {
>> > -                          rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > -                          goto done;
>> > -                  }
>> > -                  logSvcUsrName.length =
>> strlen(logSvcUsrChars);
>> > -                  if (logSvcUsrName.length >=
>> SA_MAX_NAME_LENGTH) {
>> > -
>>      TRACE("SA_AMF_COMPONENT_NAME is too long");
>> > -                          rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > -                          goto done;
>> > -                  }
>> > -                  strcpy((char
>> *)logSvcUsrName.value, logSvcUsrChars);
>> > -                  write_param->logSvcUsrName =
>> &logSvcUsrName;
>> > -          } else {
>> > -                  if (logRecord-
>> >logHeader.genericHdr.logSvcUsrName->length >=
>> > SA_MAX_NAME_LENGTH) {
>> > -
>>      TRACE("logSvcUsrName too long");
>> > -                          rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > -                          goto done;
>> > -                  }
>> > -                  logSvcUsrName.length =
>> > logRecord->logHeader.genericHdr.logSvcUsrName->length;
>> > -                  write_param->logSvcUsrName =
>> (SaNameT
>> > *)logRecord->logHeader.genericHdr.logSvcUsrName;
>> > -          }
>> > -  }
>> > -
>> > -  if (logRecord->logHdrType == SA_LOG_NTF_HEADER) {
>> > -          if (logRecord-
>> >logHeader.ntfHdr.notificationObject == NULL) {
>> > -                  TRACE("notificationObject ==
>> NULL");
>> > -                  rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > -                  goto done;
>> > -          }
>> > -
>> > -          if (logRecord-
>> >logHeader.ntfHdr.notificationObject->length >=
>> > SA_MAX_NAME_LENGTH) {
>> > -                  TRACE("notificationObject.length
>> >= SA_MAX_NAME_LENGTH");
>> > -                  rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > -                  goto done;
>> > -          }
>> > -
>> > -          if (logRecord->logHeader.ntfHdr.notifyingObject
>> == NULL) {
>> > -                  TRACE("notifyingObject ==
>> NULL");
>> > -                  rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > -                  goto done;
>> > -          }
>> > -
>> > -          if (logRecord-
>> >logHeader.ntfHdr.notifyingObject->length >=
>> > SA_MAX_NAME_LENGTH) {
>> > -                  TRACE("notifyingObject.length >=
>> SA_MAX_NAME_LENGTH");
>> > -                  rc =
>> SA_AIS_ERR_INVALID_PARAM;
>> > -                  goto done;
>> > -          }
>> > -  }
>> > -
>> > -  /* retrieve log stream hdl record */
>> > +  /* Retrieve log stream hdl record
>> > +   * From now on we must give stream before return
>> > +   */
>> >    lstr_hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA,
>> logStreamHandle);
>> >    if (lstr_hdl_rec == NULL) {
>> > -          TRACE("ncshm_take_hdl logStreamHandle
>> FAILED!");
>> > -          rc = SA_AIS_ERR_BAD_HANDLE;
>> > +          TRACE("%s: ncshm_take_hdl logStreamHandle
>> FAILED!",
>> > +                  __FUNCTION__);
>> > +          ais_rc = SA_AIS_ERR_BAD_HANDLE;
>> >            goto done;
>> >    }
>> >
>> >    /* SA_AIS_ERR_INVALID_PARAM, bullet 1 in SAI-AIS-LOG-
>> A.02.01
>> >       Section 3.6.3, Return Values */
>> >    if (lstr_hdl_rec->log_header_type != logRecord->logHdrType)
>> {
>> > -          TRACE("lstr_hdl_rec->log_header_type !=
>> logRecord->logHdrType");
>> > -          ncshm_give_hdl(logStreamHandle);
>> > -          rc = SA_AIS_ERR_INVALID_PARAM;
>> > -          goto done;
>> > +          TRACE("%s: lstr_hdl_rec->log_header_type !=
>> > logRecord->logHdrType",
>> > +                  __FUNCTION__);
>> > +          ais_rc = SA_AIS_ERR_INVALID_PARAM;
>> > +          goto done_give_hdl_stream;
>> >    }
>> >
>> > -  /* retrieve the lga client hdl record */
>> > +  /* retrieve the lga client hdl record
>> > +   * From now on we must give both stream and client (parent)
>> handle
>> > +   * before return
>> > +   */
>> >    hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA,
>> > lstr_hdl_rec->parent_hdl->local_hdl);
>> >    if (hdl_rec == NULL) {
>> > -          TRACE("ncshm_take_hdl logHandle FAILED!");
>> > -          ncshm_give_hdl(logStreamHandle);
>> > -          rc = SA_AIS_ERR_LIBRARY;
>> > -          goto done;
>> > +          TRACE("%s: ncshm_take_hdl logHandle
>> FAILED!",
>> > +                  __FUNCTION__);
>> > +          ais_rc = SA_AIS_ERR_LIBRARY;
>> > +          goto done_give_hdl_stream;
>> >    }
>> >
>> >    if ((hdl_rec->reg_cbk.saLogWriteLogCallback == NULL) &&
>> (ackFlags ==
>> > SA_LOG_RECORD_WRITE_ACK)) {
>> > -          TRACE("Write Callback not registered");
>> > -          ncshm_give_hdl(logStreamHandle);
>> > -          ncshm_give_hdl(hdl_rec->local_hdl);
>> > -          rc = SA_AIS_ERR_INIT;
>> > -          goto done;
>> > +          TRACE("%s: Write Callback not registered",
>> __FUNCTION__);
>> > +          ais_rc = SA_AIS_ERR_INIT;
>> > +          goto done_give_hdl_all;
>> >    }
>> >
>> > +  /***
>> > +   * Handle states
>> > +   * Synchronize with mds and recovery thread (mutex)
>> > +   */
>> > +  pthread_mutex_lock(&lga_cb.cb_lock);
>> > +  lgs_state_t lgs_state = lga_cb.lgs_state;
>> > +  lga_state_t lga_state = lga_cb.lga_state;
>> > +  pthread_mutex_unlock(&lga_cb.cb_lock);
>> >
>> > -  /* Check Whether LGS is up or not */
>> > -  if (!lga_cb.lgs_up) {
>> > -          ncshm_give_hdl(logStreamHandle);
>> > -          ncshm_give_hdl(hdl_rec->local_hdl);
>> > -          TRACE("LGS down");
>> > -          rc = SA_AIS_ERR_TRY_AGAIN;
>> > -          goto done;
>> > +  if (lgs_state == LGS_NO_ACTIVE) {
>> > +          /* We have a server but it is temporarily
>> unavailable.
>> > +           * Client may try again
>> > +           */
>> > +          TRACE("%s: LGS no active", __FUNCTION__);
>> > +          ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> > +          goto done_give_hdl_all;
>> > +  }
>> > +
>> > +  if (lga_state == LGA_NO_SERVER) {
>> > +          /* We have no server and cannot write. The
>> client may try again
>> > +           */
>> > +          TRACE("\t LGA_NO_SERVER");
>> > +          ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> > +          goto done_give_hdl_all;
>> > +  }
>> > +
>> > +  if (lga_state == LGA_RECOVERY2) {
>> > +          /* Auto recovery is ongoing. We have to wait for
>> it to finish.
>> > +           * The client may try again
>> > +           */
>> > +          TRACE("\t LGA_RECOVERY2");
>> > +          ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> > +          goto done_give_hdl_all;
>> > +  }
>> > +
>> > +  if (lga_state == LGA_RECOVERY1) {
>> > +          /* We are in recovery 1 state.
>> > +           * Recover client and execute the request
>> > +           */
>> > +          TRACE("\t LGA_RECOVERY1");
>> > +          rc = recover_one_client(hdl_rec);
>> > +          if (rc == -1) {
>> > +                  /* Client could not be recovered.
>> Delete client and
>> > +                   * return BAD HANDLE
>> > +                   */
>> > +                  TRACE("\t recover_one_client
>> Fail");
>> > +                  /* The log stream handle is not
>> released in
>> > +                   * delete_one_client()  so we
>> have to do it here.
>> > +                   * The function is used in other
>> APIs that does not
>> > +                   * take a log stream handle
>> > +                   */
>> > +
>>      ncshm_give_hdl(logStreamHandle);
>> > +                  (void)
>> delete_one_client(&lga_cb.client_list, hdl_rec);
>> > +                  ais_rc =
>> SA_AIS_ERR_BAD_HANDLE;
>> > +                  /* Handles are destroyed so we
>> shall not give handles */
>> > +                  goto done;
>> > +          }
>> >    }
>> >
>> >      /** populate the mds message to send across to the LGS
>> > @@ -955,18 +1236,20 @@ SaAisErrorT saLogWriteLogAsync(SaLogStre
>> >      /** Send the message out to the LGS
>> >       **/
>> >    if (NCSCC_RC_SUCCESS !=
>> lga_mds_msg_async_send(&lga_cb, &msg,
>> > MDS_SEND_PRIORITY_MEDIUM))
>> > -          rc = SA_AIS_ERR_TRY_AGAIN;
>> > +          ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> >
>> > -    /** Give all the handles that were taken **/
>> > -  ncshm_give_hdl(lstr_hdl_rec->log_stream_hdl);
>> > + done_give_hdl_all:
>> >    ncshm_give_hdl(hdl_rec->local_hdl);
>> > + done_give_hdl_stream:
>> > +  ncshm_give_hdl(logStreamHandle);
>> >
>> > - done:
>> > +done:
>> >    TRACE_LEAVE();
>> > -  return rc;
>> > +  return ais_rc;
>> >  }
>> >
>> >  /**
>> > + * API function for closing stream
>> >   *
>> >   * @param logStreamHandle
>> >   *
>> > @@ -977,34 +1260,85 @@ SaAisErrorT saLogStreamClose(SaLogStream
>> >    lga_log_stream_hdl_rec_t *lstr_hdl_rec;
>> >    lga_client_hdl_rec_t *hdl_rec;
>> >    lgsv_msg_t msg, *o_msg = NULL;
>> > -  SaAisErrorT rc = SA_AIS_OK;
>> > +  SaAisErrorT ais_rc = SA_AIS_OK;
>> >    uint32_t mds_rc;
>> > +  int rc;
>> >
>> >    TRACE_ENTER();
>> >
>> >    lstr_hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA,
>> logStreamHandle);
>> >    if (lstr_hdl_rec == NULL) {
>> >            TRACE("ncshm_take_hdl logStreamHandle ");
>> > -          rc = SA_AIS_ERR_BAD_HANDLE;
>> > +          ais_rc = SA_AIS_ERR_BAD_HANDLE;
>> >            goto done;
>> >    }
>> >
>> > +  /***
>> > +   * Handle states
>> > +   * Synchronize with mds and recovery thread (mutex)
>> > +   */
>> > +  pthread_mutex_lock(&lga_cb.cb_lock);
>> > +  lgs_state_t lgs_state = lga_cb.lgs_state;
>> > +  lga_state_t lga_state = lga_cb.lga_state;
>> > +  pthread_mutex_unlock(&lga_cb.cb_lock);
>> > +
>> > +  if (lgs_state == LGS_NO_ACTIVE) {
>> > +          /* We have a server but it is temporarily
>> unavailable. Client may
>> > +           * try again
>> > +           */
>> > +          TRACE("%s LGS no active", __FUNCTION__);
>> > +          ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> > +          goto done_give_hdl_stream;
>> > +  }
>> > +
>> > +  if (lga_state == LGA_NO_SERVER) {
>> > +          /* We have no server and cannot write. The
>> client may try again
>> > +           */
>> > +          TRACE("%s No server", __FUNCTION__);
>> > +          ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> > +          goto done_give_hdl_stream;
>> > +  }
>> > +
>> > +  if (lga_state == LGA_RECOVERY2) {
>> > +          /* Auto recovery is ongoing. We have to wait for
>> it to finish.
>> > +           * The client may try again
>> > +           */
>> > +          TRACE("%s LGA auto recovery ongoing (2)",
>> __FUNCTION__);
>> > +          ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> > +          goto done_give_hdl_stream;
>> > +  }
>> > +
>> >    /* retrieve the client hdl record */
>> >    hdl_rec = ncshm_take_hdl(NCS_SERVICE_ID_LGA,
>> > lstr_hdl_rec->parent_hdl->local_hdl);
>> >    if (hdl_rec == NULL) {
>> >            TRACE("ncshm_take_hdl logHandle ");
>> > -          rc = SA_AIS_ERR_LIBRARY;
>> > +          ais_rc = SA_AIS_ERR_LIBRARY;
>> >            goto done_give_hdl_stream;
>> >    }
>> >
>> > -  /* Check Whether LGS is up or not */
>> > -  if (!lga_cb.lgs_up) {
>> > -          TRACE("LGS is down");
>> > -          rc = SA_AIS_ERR_TRY_AGAIN;
>> > -          goto done_give_hdl_all;
>> > +  if (lga_state == LGA_RECOVERY1) {
>> > +          /* We are in recovery 1 state.
>> > +           * Recover client and execute the request
>> > +           */
>> > +          rc = recover_one_client(hdl_rec);
>> > +          if (rc == -1) {
>> > +                  /* Client could not be recovered.
>> Delete client and
>> > +                   * return BAD HANDLE
>> > +                   */
>> > +                  TRACE("%s Recover Fail
>> delete_one_client", __FUNCTION__);
>> > +                  /* The log stream handle is not
>> released in
>> > +                   * delete_one_client()  so we
>> have to do it here.
>> > +                   * The function is used in other
>> APIs that does not
>> > +                   * take a log stream handle
>> > +                   */
>> > +
>>      ncshm_give_hdl(logStreamHandle);
>> > +                  (void)
>> delete_one_client(&lga_cb.client_list, hdl_rec);
>> > +                  ais_rc =
>> SA_AIS_ERR_BAD_HANDLE;
>> > +                  /* Handles are destroyed so we
>> shall not give handles */
>> > +                  goto done;
>> > +          }
>> >    }
>> >
>> > -
>> >      /** Populate a MDS message to send to the LGS for a channel
>> >       *  close operation.
>> >       **/
>> > @@ -1018,22 +1352,22 @@ SaAisErrorT saLogStreamClose(SaLogStream
>> >    case NCSCC_RC_SUCCESS:
>> >            break;
>> >    case NCSCC_RC_REQ_TIMOUT:
>> > -          rc = SA_AIS_ERR_TIMEOUT;
>> > -          TRACE("lga_mds_msg_sync_send FAILED: %u",
>> rc);
>> > +          ais_rc = SA_AIS_ERR_TIMEOUT;
>> > +          TRACE("lga_mds_msg_sync_send FAILED: %s",
>> saf_error(ais_rc));
>> >            goto done_give_hdl_all;
>> >    default:
>> > -          TRACE("lga_mds_msg_sync_send FAILED: %u",
>> rc);
>> > -          rc = SA_AIS_ERR_NO_RESOURCES;
>> > +          TRACE("lga_mds_msg_sync_send FAILED: %s",
>> saf_error(ais_rc));
>> > +          ais_rc = SA_AIS_ERR_NO_RESOURCES;
>> >            goto done_give_hdl_all;
>> >    }
>> >
>> >    if (o_msg != NULL) {
>> > -          rc = o_msg->info.api_resp_info.rc;
>> > +          ais_rc = o_msg->info.api_resp_info.rc;
>> >            lga_msg_destroy(o_msg);
>> >    } else
>> > -          rc = SA_AIS_ERR_NO_RESOURCES;
>> > +          ais_rc = SA_AIS_ERR_NO_RESOURCES;
>> >
>> > -  if (rc == SA_AIS_OK) {
>> > +  if (ais_rc == SA_AIS_OK) {
>> >            pthread_mutex_lock(&lga_cb.cb_lock);
>> >
>> >    /** Delete this log stream & the associated resources with this
>> > @@ -1041,7 +1375,7 @@ SaAisErrorT saLogStreamClose(SaLogStream
>> >          **/
>> >            if (NCSCC_RC_SUCCESS !=
>> > lga_log_stream_hdl_rec_del(&hdl_rec->stream_list, lstr_hdl_rec)) {
>> >                    TRACE("Unable to delete log
>> stream");
>> > -                  rc = SA_AIS_ERR_LIBRARY;
>> > +                  ais_rc = SA_AIS_ERR_LIBRARY;
>> >            }
>> >
>> >            pthread_mutex_unlock(&lga_cb.cb_lock);
>> > @@ -1054,7 +1388,7 @@ SaAisErrorT saLogStreamClose(SaLogStream
>> >
>> >   done:
>> >    TRACE_LEAVE();
>> > -  return rc;
>> > +  return ais_rc;
>> >  }
>> >
>> >  /**
>> > diff --git a/osaf/libs/agents/saf/lga/lga_mds.c
>> > b/osaf/libs/agents/saf/lga/lga_mds.c
>> > --- a/osaf/libs/agents/saf/lga/lga_mds.c
>> > +++ b/osaf/libs/agents/saf/lga/lga_mds.c
>> > @@ -17,6 +17,7 @@
>> >
>> >  #include <stdlib.h>
>> >  #include "lga.h"
>> > +#include "lga_state.h"
>> >
>> >  static MDS_CLIENT_MSG_FORMAT_VER
>> >
>> LGA_WRT_LGS_MSG_FMT_ARRAY[LGA_WRT_LGS_SUBPART_VER_RANGE]
>> = {
>> > @@ -478,38 +479,56 @@ static uint32_t lga_lgs_msg_proc(lga_cb_
>> >
>> >
>> **********************************************************
>> ********************/
>> >  static uint32_t lga_mds_svc_evt(struct ncsmds_callback_info
>> > *mds_cb_info)
>> >  {
>> > -  TRACE_2("LGA Rcvd MDS subscribe evt from svc %d \n",
>> > mds_cb_info->info.svc_evt.i_svc_id);
>> > +  TRACE_ENTER();
>> >
>> >    switch (mds_cb_info->info.svc_evt.i_change) {
>> >    case NCSMDS_NO_ACTIVE:
>> > +          TRACE("%s\t NCSMDS_NO_ACTIVE",
>> __FUNCTION__);
>> > +          /* This is a temporary server down e.g. during
>> switch/fail over*/
>> > +          if (mds_cb_info->info.svc_evt.i_svc_id ==
>> NCSMDS_SVC_ID_LGS) {
>> > +
>>      pthread_mutex_lock(&lga_cb.cb_lock);
>> > +                  TRACE("NCSMDS_NO_ACTIVE");
>> > +
>> > +                  memset(&lga_cb.lgs_mds_dest,
>> 0, sizeof(MDS_DEST));
>> > +                  lga_cb.lgs_state =
>> LGS_NO_ACTIVE;
>> > +
>>      pthread_mutex_unlock(&lga_cb.cb_lock);
>> > +          }
>> > +          break;
>> >    case NCSMDS_DOWN:
>> > +          TRACE("%s\t NCSMDS_DOWN",
>> __FUNCTION__);
>> > +          /* This may be a loss of server where all client
>> and stream
>> > information
>> > +           * is lost on server side. In this situation client
>> info in agent
>> > is
>> > +           * no longer valid and clients must register again
>> (initialize)
>> > +           */
>> >            if (mds_cb_info->info.svc_evt.i_svc_id ==
>> NCSMDS_SVC_ID_LGS) {
>> > -          /** TBD what to do if LGS goes down
>> > -                 ** Hold on to the subscription if possible
>> > -                 ** to send them out if LGS comes back up
>> > -                 **/
>> > +
>>      pthread_mutex_lock(&lga_cb.cb_lock);
>> >                    TRACE("LGS down");
>> > -
>>      pthread_mutex_lock(&lga_cb.cb_lock);
>> >                    memset(&lga_cb.lgs_mds_dest,
>> 0, sizeof(MDS_DEST));
>> > -                  lga_cb.lgs_up = 0;
>> > +                  lga_cb.lgs_state = LGS_DOWN;
>> >
>>      pthread_mutex_unlock(&lga_cb.cb_lock);
>> > +
>> > +                  /* The log server is lost */
>> > +                  lga_no_server_state_set();
>> >            }
>> >            break;
>> >    case NCSMDS_NEW_ACTIVE:
>> >    case NCSMDS_UP:
>> >            switch (mds_cb_info->info.svc_evt.i_svc_id) {
>> >            case NCSMDS_SVC_ID_LGS:
>> > +                  TRACE("%s\t NCSMDS_UP" ,
>> __FUNCTION__);
>> >                /** Store the MDS DEST of the LGS
>> >                       **/
>> > -                  TRACE_2("MSG from LGS
>> NCSMDS_NEW_ACTIVE/UP");
>> >
>>      pthread_mutex_lock(&lga_cb.cb_lock);
>> >                    lga_cb.lgs_mds_dest =
>> mds_cb_info->info.svc_evt.i_dest;
>> > -                  lga_cb.lgs_up = 1;
>> > +                  lga_cb.lgs_state = LGS_UP;
>> >                    if (lga_cb.lgs_sync_awaited) {
>> >                            /* signal waiting
>> thread */
>> >
>>      m_NCS_SEL_OBJ_IND(&lga_cb.lgs_sync_sel);
>> >                    }
>> >
>>      pthread_mutex_unlock(&lga_cb.cb_lock);
>> > +
>> > +                  /* The log server is up */
>> > +                  lga_serv_recov1state_set();
>> >                    break;
>> >            default:
>> >                    break;
>> > @@ -519,6 +538,7 @@ static uint32_t lga_mds_svc_evt(struct n
>> >            break;
>> >    }
>> >
>> > +  TRACE_LEAVE();
>> >    return NCSCC_RC_SUCCESS;
>> >  }
>> >
>> > @@ -1141,8 +1161,9 @@ uint32_t lga_mds_msg_sync_send(lga_cb_t
>> >            /* Retrieve the response and take ownership of
>> the memory  */
>> >            *o_msg = (lgsv_msg_t
>> *)mds_info.info.svc_send.info.sndrsp.o_rsp;
>> >            mds_info.info.svc_send.info.sndrsp.o_rsp =
>> NULL;
>> > -  } else
>> > +  } else {
>> >            TRACE("lga_mds_msg_sync_send FAILED: %u",
>> rc);
>> > +  }
>> >
>> >    TRACE_LEAVE();
>> >    return rc;
>> > @@ -1184,8 +1205,9 @@ uint32_t lga_mds_msg_async_send(lga_cb_t
>> >
>> >    /* send the message */
>> >    rc = ncsmds_api(&mds_info);
>> > -  if (rc != NCSCC_RC_SUCCESS)
>> > -          TRACE("failed");
>> > +  if (rc != NCSCC_RC_SUCCESS) {
>> > +          TRACE("%s: async send failed",
>> __FUNCTION__);
>> > +  }
>> >
>> >    TRACE_LEAVE();
>> >    return rc;
>> > diff --git a/osaf/libs/agents/saf/lga/lga_state.c
>> > b/osaf/libs/agents/saf/lga/lga_state.c
>> > new file mode 100644
>> > --- /dev/null
>> > +++ b/osaf/libs/agents/saf/lga/lga_state.c
>> > @@ -0,0 +1,670 @@
>> > +/*      -*- OpenSAF  -*-
>> > + *
>> > + * (C) Copyright 2016 The OpenSAF Foundation
>> > + *
>> > + * This program is distributed in the hope that it will be useful,
>> > but
>> > + * WITHOUT ANY WARRANTY; without even the implied warranty of
>> > MERCHANTABILITY
>> > + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are
>> > licensed
>> > + * under the GNU Lesser General Public License Version 2.1, February
>> > 1999.
>> > + * The complete license can be accessed from the following location:
>> > + * http://opensource.org/licenses/lgpl-license.php
>> > + * See the Copying file included with the OpenSAF distribution for
>> > full
>> > + * licensing terms.
>> > + *
>> > + * Author(s): Ericsson AB
>> > + *
>> > + */
>> > +
>> > +#include "lga_state.h"
>> > +#include <stdlib.h>
>> > +#include <syslog.h>
>> > +#include <pthread.h>
>> > +#include <osaf_poll.h>
>> > +#include <ncs_osprm.h>
>> > +#include <osaf_time.h>
>> > +#include <saf_error.h>
>> > +
>> > +/***
>> > + * Common data
>> > + */
>> > +
>> > +/* Selection object for terminating recovery thread for state 2
>> > (after timeout)
>> > + * NOTE: Only for recovery2_thread
>> > + */
>> > +static NCS_SEL_OBJ state2_terminate_sel_obj;
>> > +
>> > +static pthread_t recovery2_thread_id = 0;
>> > +static pthread_mutex_t lga_recov_mutex =
>> PTHREAD_MUTEX_INITIALIZER;
>> > +
>> >
>> +/*********************************************************
>> *********************
>> > + * Functions used with server down recovery handling
>> > +
>> >
>> **********************************************************
>> ********************/
>> > +
>> > +static int start_recovery2_thread(void);
>> > +
>> >
>> +/*********************************************************
>> *********************
>> > + * Functions for sending messages to the server used in the recovery
>> > functions
>> > + * Handles TRY AGAIN
>> > +
>> >
>> **********************************************************
>> ********************/
>> > +
>> > +/**
>> > + * Send an initialize message to the server. A number of retries is
>> > done if
>> > + * return code is TRY AGAIN
>> > + * The server returns a client id
>> > + *
>> > + * @param client_id[out]
>> > + * @return -1 on error (client id not valid)
>> > + */
>> > +static int send_initialize_msg(uint32_t *client_id)
>> > +{
>> > +  SaVersionT version = {
>> > +          .releaseCode = LOG_RELEASE_CODE,
>> > +          .majorVersion = LOG_MAJOR_VERSION,
>> > +          .minorVersion = LOG_MINOR_VERSION
>> > +  };
>> > +
>> > +  SaAisErrorT ais_rc = SA_AIS_ERR_TRY_AGAIN;
>> > +  uint32_t ncs_rc = NCSCC_RC_SUCCESS;
>> > +  int rc = 0;
>> > +  lgsv_msg_t i_msg, *o_msg = NULL;
>> > +  uint32_t try_again_cnt = 10;
>> > +  const uint32_t sleep_delay_ms = 100;
>> > +
>> > +  TRACE_ENTER();
>> > +
>> > +  /* Populate the message to be sent to the LGS */
>> > +  memset(&i_msg, 0, sizeof(lgsv_msg_t));
>> > +  i_msg.type = LGSV_LGA_API_MSG;
>> > +  i_msg.info.api_info.type = LGSV_INITIALIZE_REQ;
>> > +  i_msg.info.api_info.param.init.version = version;
>> > +
>> > +  /* Send a message to LGS to obtain a client_id
>> > +   */
>> > +  while(try_again_cnt > 0) {
>> > +          ncs_rc = lga_mds_msg_sync_send(&lga_cb,
>> &i_msg, &o_msg,
>> > +
>>      LGS_WAIT_TIME,MDS_SEND_PRIORITY_HIGH);
>> > +          if (ncs_rc != NCSCC_RC_SUCCESS) {
>> > +                  LOG_NO("%s
>> lga_mds_msg_sync_send() Fail %d",
>> > +                          __FUNCTION__,
>> ncs_rc);
>> > +                  rc = -1;
>> > +                  goto done;
>> > +          }
>> > +
>> > +          ais_rc = o_msg->info.api_resp_info.rc;
>> > +          if (ais_rc == SA_AIS_ERR_TRY_AGAIN) {
>> > +                  usleep(sleep_delay_ms * 1000);
>> > +          } else {
>> > +                  break;
>> > +          }
>> > +          try_again_cnt--;
>> > +  }
>> > +  if (SA_AIS_OK != ais_rc) {
>> > +          TRACE("%s LGS error response %s",
>> __FUNCTION__,
>> > saf_error(ais_rc));
>> > +          rc = -1;
>> > +          goto free_done;
>> > +  }
>> > +
>> > +  /* Initialize succeeded */
>> > +  *client_id = o_msg-
>> >info.api_resp_info.param.init_rsp.client_id;
>> > +
>> > +free_done:
>> > +  /* Free up the response message */
>> > +  lga_msg_destroy(o_msg);
>> > +
>> > +done:
>> > +  TRACE_LEAVE2("rc = %d", rc);
>> > +  return rc;
>> > +}
>> > +
>> > +/**
>> > + * Send an open message to the server. A number of retries is done
>> > if
>> > + * return code is TRY AGAIN
>> > + * The server returns client id and a stream id
>> > + *
>> > + * @param lstream_id[out] Log stream id received from server
>> > + * @param p_stream[in]    Pointer to a stream record
>> > + * @return -1 on error (stream id not valid)
>> > + */
>> > +static int send_stream_open_msg(uint32_t *lstream_id,
>> > +  lga_log_stream_hdl_rec_t *p_stream)
>> > +{
>> > +  SaAisErrorT ais_rc = SA_AIS_OK;
>> > +  uint32_t ncs_rc = NCSCC_RC_SUCCESS;
>> > +  int rc = 0;
>> > +  lgsv_msg_t i_msg, *o_msg;
>> > +  lgsv_stream_open_req_t *open_param;
>> > +  lga_client_hdl_rec_t *p_client = p_stream->parent_hdl;
>> > +  uint32_t try_again_cnt = 10;
>> > +  const uint32_t sleep_delay_ms = 100;
>> > +
>> > +  TRACE_ENTER();
>> > +  TRACE("\t log_stream_name \"%s\", lgs_client_id=%d",
>> > +          p_stream->log_stream_name.value, p_client-
>> >lgs_client_id);
>> > +
>> > +  /* Populate a stream open message to the LGS
>> > +   */
>> > +  memset(&i_msg, 0, sizeof(lgsv_msg_t));
>> > +  open_param = &i_msg.info.api_info.param.lstr_open_sync;
>> > +
>> > +  /* Set the open parameters to open a stream for recovery */
>> > +  open_param->client_id = p_client->lgs_client_id;
>> > +  open_param->lstr_name = p_stream->log_stream_name;
>> > +  open_param->logFileFmt = NULL;
>> > +  open_param->logFileFmtLength = 0;
>> > +  open_param->maxLogFileSize = 0;
>> > +  open_param->maxLogRecordSize = 0;
>> > +  open_param->haProperty = SA_FALSE;
>> > +  open_param->logFileFullAction = 0;
>> > +  open_param->maxFilesRotated = 0;
>> > +  open_param->lstr_open_flags = 0;
>> > +
>> > +  i_msg.type = LGSV_LGA_API_MSG;
>> > +  i_msg.info.api_info.type = LGSV_STREAM_OPEN_REQ;
>> > +
>> > +  /* Send a message to LGS to obtain a stream_id
>> > +   */
>> > +  while(try_again_cnt > 0) {
>> > +          ncs_rc = lga_mds_msg_sync_send(&lga_cb,
>> &i_msg, &o_msg,
>> > +                  LGS_WAIT_TIME,
>> MDS_SEND_PRIORITY_HIGH);
>> > +          if (ncs_rc != NCSCC_RC_SUCCESS) {
>> > +                  rc = -1;
>> > +                  TRACE("%s
>> lga_mds_msg_sync_send() Fail %d", __FUNCTION__,
>> > ncs_rc);
>> > +                  goto done;
>> > +          }
>> > +
>> > +          ais_rc = o_msg->info.api_resp_info.rc;
>> > +          if (ais_rc == SA_AIS_ERR_TRY_AGAIN) {
>> > +                  usleep(sleep_delay_ms * 1000);
>> > +          } else {
>> > +                  break;
>> > +          }
>> > +          try_again_cnt--;
>> > +  }
>> > +  if (SA_AIS_OK != ais_rc) {
>> > +          TRACE("%s LGS error response %s",
>> __FUNCTION__,
>> > saf_error(ais_rc));
>> > +          rc = -1;
>> > +          goto free_done;
>> > +  }
>> > +
>> > +  /* Open succeeded */
>> > +  *lstream_id =
>> > o_msg->info.api_resp_info.param.lstr_open_rsp.lstr_id;
>> > +
>> > +free_done:
>> > +  /* Free up the response message */
>> > +  lga_msg_destroy(o_msg);
>> > +
>> > +done:
>> > +  TRACE_LEAVE2("rc = %d", rc);
>> > +  return rc;
>> > +}
>> > +
>> >
>> +/*********************************************************
>> *********************
>> > + * Recovery functions
>> > +
>> >
>> **********************************************************
>> ********************/
>> > +
>> > +/**
>> > + * Register an existing client with the server
>> > + *  - Send an initialize request to the server
>> > + *  - A new client id is received replacing the old no longer valid
>> > one
>> > + *
>> > + * This function shall only be used in recovery. It is assumed that
>> > + * lga_serv_downstate_set() has been called
>> > + *
>> > + * @param p_client[in] Pointer to a client record
>> > + * @return -1 on error
>> > + *          0 client is successfully initialized
>> > + *          1 client was already initialized
>> > + */
>> > +static int initialize_one_client(lga_client_hdl_rec_t *p_client)
>> > +{
>> > +  int rc = 0;
>> > +  uint32_t client_id;
>> > +
>> > +  TRACE_ENTER();
>> > +
>> > +  if (p_client->initialized_flag == true) {
>> > +          /* The client is already initialized */
>> > +          rc = 1;
>> > +          goto done;
>> > +  }
>> > +
>> > +  rc = send_initialize_msg(&client_id);
>> > +  if (rc == -1) {
>> > +          TRACE("%s initialize_msg_send Fail",
>> __FUNCTION__);
>> > +  }
>> > +
>> > +  /* Restore the client Id with the one returned by the LGS and
>> > +   * set the initialized flag
>> > +   */
>> > +  osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>> > +  p_client->lgs_client_id = client_id;
>> > +  p_client->initialized_flag = true;
>> > +  osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>> > +
>> > +done:
>> > +  TRACE_LEAVE2("rc = %d", rc);
>> > +  return rc;
>> > +}
>> > +
>> > +/**
>> > + * Register a stream that was opened by the client with the server
>> > + *  - Send a stream open request to the server (no parameter list)
>> > + *  - A new stream id is received replacing the old no longer valid
>> > one
>> > + *
>> > + * @param p_stream[in] Pointer to a stream record
>> > + * @return -1 on error
>> > + *          0 The stream was successfully recovered
>> > + *          1 The stream was already recovered
>> > + */
>> > +static int recover_one_stream(lga_log_stream_hdl_rec_t *p_stream)
>> > +{
>> > +  int rc = 0;
>> > +  uint32_t stream_id;
>> > +
>> > +  TRACE_ENTER();
>> > +
>> > +  if (p_stream->recovered_flag == true) {
>> > +          /* The stream is already recovered */
>> > +          rc = 1;
>> > +          goto done;
>> > +  }
>> > +
>> > +  rc = send_stream_open_msg(&stream_id, p_stream);
>> > +  if (rc == -1) {
>> > +          TRACE("%s open_stream_msg_send Fail",
>> __FUNCTION__);
>> > +          goto done;
>> > +  }
>> > +
>> > +  /* Restore the stream Id with the Id returned by the LGS and
>> > +   * set the recovered flag
>> > +   */
>> > +  osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>> > +  p_stream->lgs_log_stream_id = stream_id;
>> > +  p_stream->recovered_flag = true;
>> > +  osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>> > +
>> > +done:
>> > +  TRACE_LEAVE2("rc = %d", rc);
>> > +  return rc;
>> > +}
>> > +
>> >
>> +/*********************************************************
>> *********************
>> > + * Recovery 2 thread handling functions
>> > +
>> >
>> **********************************************************
>> ********************/
>> > +
>> > +/**
>> > + * Thread for handling recovery step 2
>> > + * Wait for timeout or stop request
>> > + * After timeout, recover all not already recovered clients
>> > + *
>> > + * @param int timeout_time_in Timeout time in s
>> > + * @return NULL
>> > + */
>> > +static void *recovery2_thread(void *dummy)
>> > +{
>> > +  int rc = 0;
>> > +  struct timespec seed_ts;
>> > +  int timeout_ms;
>> > +
>> > +  TRACE_ENTER();
>> > +
>> > +  /* Create a random timeout time in msec within an interval
>> > +   */
>> > +  /* Set seed. Use nanoseconds from clock */
>> > +  osaf_clock_gettime(CLOCK_MONOTONIC, &seed_ts);
>> > +  srandom((unsigned int) seed_ts.tv_nsec);
>> > +  /* Interval 400 - 500 sec */
>> > +  timeout_ms = (int) (random() % 100 + 400) * 1000;
>> > +
>> > +  /* Wait for timeout or a signal to terminate
>> > +   */
>> > +  rc = osaf_poll_one_fd(state2_terminate_sel_obj.rmv_obj,
>> > timeout_ms);
>> > +  if (rc == -1) {
>> > +          TRACE("%s osaf_poll_one_fd Fail %s",
>> __FUNCTION__,
>> > strerror(errno));
>> > +          goto done;
>> > +  }
>> > +
>> > +  if (rc == 0) {
>> > +          /* Timeout; Set recovery state 2 */
>> > +          osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>> > +          lga_cb.lga_state = LGA_RECOVERY2;
>> > +          osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>> > +          TRACE("%s Poll timeout. Enter LGA_RECOVERY2
>> state", __FUNCTION__);
>> > +  } else {
>> > +          /* Stop signal received */
>> > +          TRACE("%s Stop signal received",
>> __FUNCTION__);
>> > +          goto done;
>> > +  }
>> > +
>> > +  /* Execute recovery 2
>> > +   * Recover one client at the time
>> > +   * If fail to recover remove the client. This means that the
>> client
>> > +   * handle is invalidated
>> > +   * When all clients are recovered or if exit is requested exit the
>> > +   * thread
>> > +   */
>> > +  TRACE("%s Execute recovery 2", __FUNCTION__);
>> > +  /* First client */
>> > +  lga_client_hdl_rec_t *p_client;
>> > +  p_client = lga_cb.client_list;
>> > +
>> > +  while (p_client != NULL) {
>> > +          /* Exit if requested to */
>> > +          rc =
>> osaf_poll_one_fd(state2_terminate_sel_obj.rmv_obj, 0);
>> > +          if (rc > 0) {
>> > +                  /* We have been signaled to exit
>> */
>> > +                  goto done;
>> > +          }
>> > +          /* Recover clients one at a time */
>> > +          rc = recover_one_client(p_client);
>> > +          TRACE("\t Client %d is recovered", p_client-
>> >lgs_client_id);
>> > +          if (rc == -1) {
>> > +                  TRACE("%s recover_one_client
>> Fail Deleting cllient (id %d)",
>> > +                          __FUNCTION__,
>> p_client->lgs_client_id);
>> > +                  /* Fail to recover this client
>> > +                   * Remove (handle invalidated)
>> > +                   */
>> > +
>>      osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>> > +                  (void)
>> delete_one_client(&lga_cb.client_list, p_client);
>> > +
>>      osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>> > +          }
>> > +
>> > +          /* Next client */
>> > +          p_client = p_client->next;
>> > +  }
>> > +
>> > +  /* All clients are recovered (or removed).
>> > +   * Or recovery is aborted
>> > +   * Change to not recovering state
>> > +   * LGA_NORMAL
>> > +   */
>> > +  osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>> > +  lga_cb.lga_state = LGA_NORMAL;
>> > +  TRACE("\t Setting lga_state = LGA_NORMAL");
>> > +  osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>> > +
>> > +done:
>> > +  /* Cleanup and Exit thread */
>> > +  (void) ncs_sel_obj_destroy(&state2_terminate_sel_obj);
>> > +
>> > +  TRACE_LEAVE();
>> > +  return NULL;
>> > +}
>> > +
>> > +/**
>> > + * Setup and start the thread for recovery state 2
>> > + *
>> > + * @return -1 on error
>> > + */
>> > +static int start_recovery2_thread(void)
>> > +{
>> > +  int rc = 0;
>> > +  uint32_t ncs_rc = NCSCC_RC_SUCCESS;
>> > +  pthread_attr_t attr;
>> > +
>> > +  TRACE_ENTER();
>> > +
>> > +  pthread_attr_init(&attr);
>> > +  pthread_attr_setdetachstate(&attr,
>> PTHREAD_CREATE_DETACHED);
>> > +
>> > +  /* Create a selection object for signaling the recovery2 thread
>> */
>> > +  ncs_rc = ncs_sel_obj_create(&state2_terminate_sel_obj);
>> > +  if (ncs_rc != NCSCC_RC_SUCCESS) {
>> > +          TRACE("%s ncs_sel_obj_create Fail",
>> __FUNCTION__);
>> > +          rc = -1;
>> > +          goto done;
>> > +  }
>> > +
>> > +  /* Create the thread
>> > +   */
>> > +  if (pthread_create(&recovery2_thread_id, &attr,
>> recovery2_thread,
>> > NULL)      != 0) {
>> > +          /* pthread_create() error handling */
>> > +          TRACE("\t pthread_create FAILED: %s",
>> strerror(errno));
>> > +          rc = -1;
>> > +          (void)
>> ncs_sel_obj_destroy(&state2_terminate_sel_obj);
>> > +          goto done;
>> > +  }
>> > +  pthread_attr_destroy(&attr);
>> > +
>> > +done:
>> > +  TRACE_LEAVE2("rc = %d", rc);
>> > +  return rc;
>> > +}
>> > +
>> > +/**
>> > + * Stops the recovery 2 thread
>> > + * It is safe to call this function also if the thread is not
>> > running
>> > + */
>> > +static void stop_recovery2_thread(void)
>> > +{
>> > +  uint32_t ncs_rc = 0;
>> > +
>> > +  TRACE_ENTER();
>> > +
>> > +  /* Check if the thread is running */
>> > +  osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>> > +  lga_state_t lga_state = lga_cb.lga_state;
>> > +  osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>> > +
>> > +  if (lga_state == LGA_NORMAL) {
>> > +          /* No thread to stop */
>> > +          TRACE("%s LGA_NORMAL no thread to stop",
>> __FUNCTION__);
>> > +          goto done;
>> > +  }
>> > +
>> > +  /* Signal the thread to stop */
>> > +  ncs_rc = ncs_sel_obj_ind(&state2_terminate_sel_obj);
>> > +  if (ncs_rc != NCSCC_RC_SUCCESS) {
>> > +          TRACE("%s ncs_sel_obj_ind Fail",
>> __FUNCTION__);
>> > +  }
>> > +
>> > +  done:
>> > +  TRACE_LEAVE();
>> > +  return;
>> > +}
>> > +
>> >
>> +/*********************************************************
>> *********************
>> > + * Recovery state handling functions
>> > +
>> >
>> **********************************************************
>> ********************/
>> > +
>> >
>> +/*********************************************************
>> *********************
>> > + * Server Down
>> > + *
>> > + * Initiate recovery handling and set LGA_NO_SERVER state
>> > + * LGA_NO_SERVER: State set in MDS event handler when server down
>> > event
>> > + *                lga_no_server_state_set()
>> > +
>> >
>> **********************************************************
>> ********************/
>> > +
>> > +/**
>> > + * Recovery state LGA_NO_SERVER
>> > + *
>> > + * Setup server down state
>> > + * - Mark all clients and their streams as not recovered
>> > + * - Remove recovery thread if exist
>> > + * - Set LGA_NO_SERVER state
>> > + *
>> > + */
>> > +void lga_no_server_state_set(void)
>> > +{
>> > +  lga_client_hdl_rec_t *p_client = lga_cb.client_list;
>> > +  lga_log_stream_hdl_rec_t *p_stream;
>> > +
>> > +  TRACE_ENTER();
>> > +
>> > +  /* Stop recovery thread for state 2 if exist */
>> > +  stop_recovery2_thread();
>> > +
>> > +  /* Set LGA_NO_SERVER state */
>> > +  osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>> > +  lga_cb.lga_state = LGA_NO_SERVER;
>> > +  TRACE("\t lga_state = LGA_NO_SERVER");
>> > +  osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>> > +
>> > +  while (p_client != NULL) {
>> > +          /* Set Client flags for all clients */
>> > +          p_client->initialized_flag = false;
>> > +          p_client->recovered_flag = false;
>> > +
>> > +          /* Set stream flags for all streams in every client
>> */
>> > +          p_stream = p_client->stream_list;
>> > +          while (p_stream != NULL) {
>> > +                  p_stream->recovered_flag =
>> false;
>> > +
>> > +                  p_stream = p_stream->next;
>> > +          }
>> > +
>> > +          p_client = p_client->next;
>> > +  }
>> > +
>> > +  TRACE_LEAVE();
>> > +}
>> > +
>> >
>> +/*********************************************************
>> *********************
>> > + * Recovery state 1
>> > + *
>> > + * Recover when needed. This means that when a client requests a
>> > service
>> > + * the client and all its open streams is recovered in server.
>> > + * Handler functions for this can be found in this section.
>> > + * LGA_RECOVERY1: State set in MDS event handler when server up
>> > event
>> > + *                lga_serv_recov1state_set(). Starting recovery
>> > thread
>> > + *
>> > + * LGA_RECOVERY2: State set in recovery thread after timeout
>> > + *                lga_serv_recov2state_set()
>> > + *
>> > + * LGA_NORMAL:    State set when all clients are recovered. This is
>> > done in
>> > + *                recovery thread when done. After setting this state
>> > the
>> > + *                thread will exit
>> > +
>> >
>> **********************************************************
>> ********************/
>> > +
>> > +/**
>> > + * If previous state was LGA_NO_SERVER (headless)
>> > + * Start the recovery2_thread. This will start timeout timer for
>> > recovery 2
>> > + * state.
>> > + * Set state to LGA_RECOVERY1
>> > + */
>> > +void lga_serv_recov1state_set(void)
>> > +{
>> > +  TRACE_ENTER();
>> > +
>> > +  osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>> > +  if (lga_cb.lga_state != LGA_NO_SERVER) {
>> > +          /* We have not been headless. No recovery
>> shall be done */
>> > +          TRACE("%s Previous state was not
>> LGA_NO_SERVER lga_stat = %d",
>> > +                  __FUNCTION__,
>> lga_cb.lga_state);
>> > +          osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>> > +          goto done;
>> > +  } else {
>> > +          lga_cb.lga_state = LGA_RECOVERY1;
>> > +          TRACE("lga_state = %d (2->RECOVERY1)",
>> > +                  lga_cb.lga_state);
>> > +          osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>> > +  }
>> > +
>> > +  start_recovery2_thread();
>> > +
>> > +done:
>> > +  TRACE_LEAVE();
>> > +  return;
>> > +}
>> > +
>> > +/**
>> > + * Recover the client and all streams in its stream list
>> > + *  - Send an initialize request to the server
>> > + *  - Send a stream open request with no parameters to the server
>> > + *    for each stream.
>> > + *  - If success mark the stream and client as recovered
>> > + *  - If fail to initiate the client or recover one of its streams
>> > the
>> > + *    client shall be finalized. A finalize request is sent to the
>> > server.
>> > + *    If error return code from server it is ignored. This may happen
>> > e.g. if
>> > + *    the client was never initialized.
>> > + *
>> > + * The reason for finalizing the whole client if only one stream
>> > cannot be
>> > + * recovered is that the client will get BAD_HANDLE when requesting a
>> > service
>> > + * for that stream. This may cause the client to initialize a new
>> > client and
>> > + * the old client will remain as a resource leek.
>> > + *
>> > + * @param p_client[in] Pointer to a client record
>> > + * @return -1 on error
>> > + */
>> > +int recover_one_client(lga_client_hdl_rec_t *p_client)
>> > +{
>> > +  int rc = 0;
>> > +  lga_log_stream_hdl_rec_t *p_stream;
>> > +
>> > +  TRACE_ENTER();
>> > +
>> > +  /* This function may be called both in the recovery thread and
>> in
>> > the
>> > +   * client thread. A possible scenario is that the recovery 2
>> thread
>> > +   * times out and calls this function in recovery mode 2 while
>> there
>> > +   * a client recovery is still ongoing started in mode 1. The
>> > recovery 2
>> > +   * thread must wait until ongoing recovery is done
>> > +   */
>> > +  osaf_mutex_lock_ordie(&lga_recov_mutex);
>> > +  /* We may have been waiting at mutex while the client was
>> recovered
>> > +   * so it may already been recovered.
>> > +   */
>> > +  if (p_client->recovered_flag == true) {
>> > +          /* Client is already recovered */
>> > +          TRACE("\t Already recovered");
>> > +          goto done;
>> > +  }
>> > +
>> > +  rc = initialize_one_client(p_client);
>> > +  if (rc == -1) {
>> > +          TRACE("%s initialize_one_client() Fail client Id
>> %d",
>> > +                  __FUNCTION__, p_client-
>> >lgs_client_id);
>> > +          goto done;
>> > +  }
>> > +
>> > +  /* Recover all streams registered with this client */
>> > +  p_stream = p_client->stream_list;
>> > +  while (p_stream != NULL) {
>> > +          TRACE("\t Recover client=%d, stream=%d",
>> > +                  p_client->lgs_client_id, p_stream-
>> >lgs_log_stream_id);
>> > +          rc = recover_one_stream(p_stream);
>> > +          if (rc == -1) {
>> > +                  TRACE("%s
>> recover_one_stream() Fail "
>> > +                  "client Id %d stream Id %d",
>> __FUNCTION__,
>> > +                          p_client-
>> >lgs_client_id,
>> > +                          p_stream-
>> >lgs_log_stream_id);
>> > +                  goto done;
>> > +          }
>> > +
>> > +          p_stream = p_stream->next;
>> > +  }
>> > +
>> > +  osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>> > +  p_client->recovered_flag = true;
>> > +  osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>> > +
>> > +done:
>> > +  osaf_mutex_unlock_ordie(&lga_recov_mutex);
>> > +
>> > +  TRACE_LEAVE();
>> > +  return rc;
>> > +}
>> > +
>> > +/**
>> > + * Delete one client
>> > + * Wrapper for function lga_hdl_rec_del() is used (lga_utol.c)
>> > + * This wrapper adds a control to make sure that the function cannot
>> > + * be used by the recovery 2 thread and the client thread at the same
>> > time
>> > + *
>> > + * @param list_head
>> > + * @param rm_node
>> > + */
>> > +uint32_t delete_one_client(
>> > +  lga_client_hdl_rec_t **list_head,
>> > +  lga_client_hdl_rec_t *rm_node
>> > +  )
>> > +{
>> > +  TRACE_ENTER2();
>> > +  uint32_t ncs_rc;
>> > +
>> > +  osaf_mutex_lock_ordie(&lga_recov_mutex);
>> > +  ncs_rc = lga_hdl_rec_del(list_head, rm_node);
>> > +  osaf_mutex_unlock_ordie(&lga_recov_mutex);
>> > +
>> > +  TRACE_LEAVE();
>> > +  return ncs_rc;
>> > +}
>> > diff --git a/osaf/libs/agents/saf/lga/lga_state.h
>> > b/osaf/libs/agents/saf/lga/lga_state.h
>> > new file mode 100644
>> > --- /dev/null
>> > +++ b/osaf/libs/agents/saf/lga/lga_state.h
>> > @@ -0,0 +1,41 @@
>> > +/*      -*- OpenSAF  -*-
>> > + *
>> > + * (C) Copyright 2016 The OpenSAF Foundation
>> > + *
>> > + * This program is distributed in the hope that it will be useful,
>> > but
>> > + * WITHOUT ANY WARRANTY; without even the implied warranty of
>> > MERCHANTABILITY
>> > + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are
>> > licensed
>> > + * under the GNU Lesser General Public License Version 2.1, February
>> > 1999.
>> > + * The complete license can be accessed from the following location:
>> > + * http://opensource.org/licenses/lgpl-license.php
>> > + * See the Copying file included with the OpenSAF distribution for
>> > full
>> > + * licensing terms.
>> > + *
>> > + * Author(s): Ericsson AB
>> > + *
>> > + */
>> > +
>> > +#ifndef LGA_STATE_H
>> > +#define   LGA_STATE_H
>> > +
>> > +#ifdef    __cplusplus
>> > +extern "C" {
>> > +#endif
>> > +
>> > +#include "lga.h"
>> > +
>> > +/* Recovery functions */
>> > +void lga_no_server_state_set(void);
>> > +void lga_serv_recov1state_set(void);
>> > +int recover_one_client(lga_client_hdl_rec_t *p_client);
>> > +uint32_t delete_one_client(
>> > +  lga_client_hdl_rec_t **list_head,
>> > +  lga_client_hdl_rec_t *rm_node
>> > +  );
>> > +
>> > +#ifdef    __cplusplus
>> > +}
>> > +#endif
>> > +
>> > +#endif    /* LGA_STATE_H */
>> > +
>> > diff --git a/osaf/libs/agents/saf/lga/lga_util.c
>> > b/osaf/libs/agents/saf/lga/lga_util.c
>> > --- a/osaf/libs/agents/saf/lga/lga_util.c
>> > +++ b/osaf/libs/agents/saf/lga/lga_util.c
>> > @@ -44,7 +44,11 @@ static unsigned int lga_create(void)
>> >    }
>> >
>> >    /* Block and wait for indication from MDS meaning LGS is up */
>> > -
>>      osaf_poll_one_fd(m_GET_FD_FROM_SEL_OBJ(lga_cb.lgs_syn
>> c_sel),
>> > 30000);
>> > +
>> > +  /* #1179 Change timeout from 30 sec (30000) to 10 sec (10000)
>> > +   * 30 sec is probably too long for a synchronous API function
>> > +   */
>> > +
>>      osaf_poll_one_fd(m_GET_FD_FROM_SEL_OBJ(lga_cb.lgs_syn
>> c_sel),
>> > 10000);
>> >
>> >    pthread_mutex_lock(&lga_cb.cb_lock);
>> >    lga_cb.lgs_sync_awaited = 0;
>> > @@ -118,12 +122,16 @@ static bool lga_clear_mbx(NCSCONTEXT arg
>> >  static void lga_log_stream_hdl_rec_list_del(lga_log_stream_hdl_rec_t
>> > **plstr_hdl)
>> >  {
>> >    lga_log_stream_hdl_rec_t *lstr_hdl;
>> > +  TRACE_ENTER();
>> >    while ((lstr_hdl = *plstr_hdl) != NULL) {
>> >            *plstr_hdl = lstr_hdl->next;
>> > +          TRACE("%s stream \"%s\", hdl =
>> %d",__FUNCTION__,
>> > +                  lstr_hdl-
>> >log_stream_name.value, lstr_hdl->log_stream_hdl);
>> >            ncshm_destroy_hdl(NCS_SERVICE_ID_LGA,
>> lstr_hdl->log_stream_hdl);
>> >            free(lstr_hdl);
>> >            lstr_hdl = NULL;
>> >    }
>> > +  TRACE_LEAVE();
>> >  }
>> >
>> >
>> >
>> /**********************************************************
>> ******************
>> > @@ -264,11 +272,13 @@ static uint32_t lga_hdl_cbk_dispatch_blo
>> >  }
>> >
>> >  /**
>> > - *
>> > + * Initiate the agent when first used.
>> > + * Start NCS service
>> > + * Register with MDS
>> >   *
>> >   * @return unsigned int
>> >   */
>> > -unsigned int lga_startup(void)
>> > +unsigned int lga_startup(lga_cb_t *cb)
>> >  {
>> >    unsigned int rc = NCSCC_RC_SUCCESS;
>> >    pthread_mutex_lock(&lga_lock);
>> > @@ -287,8 +297,14 @@ unsigned int lga_startup(void)
>> >            if ((rc = lga_create()) != NCSCC_RC_SUCCESS) {
>> >                    ncs_agents_shutdown();
>> >                    goto done;
>> > -          } else
>> > +          } else {
>> >                    lga_use_count = 1;
>> > +          }
>> > +
>> > +          /* Agent has successfully been started including
>> communication
>> > +           * with server
>> > +           */
>> > +          cb->lga_state = LGA_NORMAL;
>> >    }
>> >
>> >   done:
>> > @@ -299,11 +315,18 @@ unsigned int lga_startup(void)
>> >  }
>> >
>> >  /**
>> > + * If called when only one (the last) client for this agent the
>> > client list a
>> > + * complete 'shut down' of the agent is done.
>> > + *  - Erase the clients list. Frees all memory including list of
>> > open
>> > + *    streams.
>> > + *  - Unregister with MDS
>> > + *  - Shut down ncs agents
>> >   *
>> > + * Global lga_use_count Conatins number of registered clients
>> >   *
>> > - * @return unsigned int
>> > + * @return unsigned int (always NCSCC_RC_SUCCESS)
>> >   */
>> > -unsigned int lga_shutdown(void)
>> > +unsigned int lga_shutdown_after_last_client(void)
>> >  {
>> >    unsigned int rc = NCSCC_RC_SUCCESS;
>> >
>> > @@ -325,6 +348,37 @@ unsigned int lga_shutdown(void)
>> >    return rc;
>> >  }
>> >
>> > +/**
>> > + * Makes a forced shut down of the agent if there are registered
>> > clients
>> > + * Makes all handles invalid and frees all resources (memory, mds)
>> > + *
>> > + * clients and the log server is down and no other recovery is
>> > possible.
>> > + *  - Erase the clients list. Frees all memory including list of
>> > open
>> > + *    streams.
>> > + *  - Unregister with MDS
>> > + *  - Shut down ncs agents
>> > + *  - Set
>> > + *
>> > + * Global lga_use_count [in/out] = 0
>> > + *
>> > + * @return always NCSCC_RC_SUCCESS
>> > + */
>> > +unsigned int lga_force_shutdown(void)
>> > +{
>> > +  unsigned int rc = NCSCC_RC_SUCCESS;
>> > +  TRACE_ENTER();
>> > +  pthread_mutex_lock(&lga_lock);
>> > +  if (lga_use_count > 0) {
>> > +          lga_destroy();
>> > +          rc = ncs_agents_shutdown(); /* Always returns
>> NCSCC_RC_SUCCESS */
>> > +          lga_use_count = 0;
>> > +          TRACE("%s: Forced shutdown. Handles
>> invalidated\n",__FUNCTION__);
>> > +  }
>> > +  pthread_mutex_unlock(&lga_lock);
>> > +  TRACE_LEAVE();
>> > +  return rc;
>> > +}
>> > +
>> >
>> >
>> /**********************************************************
>> ******************
>> >   * Name          : lga_msg_destroy
>> >   *
>> > @@ -387,6 +441,8 @@ void lga_hdl_list_del(lga_client_hdl_rec
>> >  {
>> >    lga_client_hdl_rec_t *client_hdl;
>> >
>> > +  TRACE_ENTER();
>> > +
>> >    while ((client_hdl = *p_client_hdl) != NULL) {
>> >            *p_client_hdl = client_hdl->next;
>> >            ncshm_destroy_hdl(NCS_SERVICE_ID_LGA,
>> client_hdl->local_hdl);
>> > @@ -398,6 +454,7 @@ void lga_hdl_list_del(lga_client_hdl_rec
>> >            free(client_hdl);
>> >            client_hdl = 0;
>> >    }
>> > +  TRACE_LEAVE();
>> >  }
>> >
>> >
>> >
>> /**********************************************************
>> ******************
>> > @@ -416,6 +473,7 @@ void lga_hdl_list_del(lga_client_hdl_rec
>> >
>> >
>> **********************************************************
>> ********************/
>> >  uint32_t lga_log_stream_hdl_rec_del(lga_log_stream_hdl_rec_t
>> > **list_head, lga_log_stream_hdl_rec_t *rm_node)
>> >  {
>> > +  TRACE_ENTER();
>> >    /* Find the channel hdl record in the list of records */
>> >    lga_log_stream_hdl_rec_t *list_iter = *list_head;
>> >
>> > @@ -427,6 +485,7 @@ uint32_t lga_log_stream_hdl_rec_del(lga_
>> >            ncshm_give_hdl(rm_node->log_stream_hdl);
>> >            ncshm_destroy_hdl(NCS_SERVICE_ID_LGA,
>> rm_node->log_stream_hdl);
>> >            free(rm_node);
>> > +          TRACE_LEAVE();
>> >            return NCSCC_RC_SUCCESS;
>> >    } else {                /* find the rec */
>> >
>> > @@ -438,6 +497,7 @@ uint32_t lga_log_stream_hdl_rec_del(lga_
>> >
>>      ncshm_give_hdl(rm_node->log_stream_hdl);
>> >
>>      ncshm_destroy_hdl(NCS_SERVICE_ID_LGA, rm_node-
>> >log_stream_hdl);
>> >                            free(rm_node);
>> > +                          TRACE_LEAVE();
>> >                            return
>> NCSCC_RC_SUCCESS;
>> >                    }
>> >                    /* move onto the next one */
>> > @@ -493,7 +553,6 @@ uint32_t lga_hdl_rec_del(lga_client_hdl_
>> >            rc = NCSCC_RC_SUCCESS;
>> >            goto out;
>> >    } else {                /* find the rec */
>> > -
>> >            while (NULL != list_iter) {
>> >                    if (list_iter->next == rm_node) {
>> >                            list_iter->next =
>> rm_node->next;
>> > @@ -517,10 +576,9 @@ uint32_t lga_hdl_rec_del(lga_client_hdl_
>> >                    list_iter = list_iter->next;
>> >            }
>> >    }
>> > -  TRACE("failed");
>> >
>> >   out:
>> > -  TRACE_LEAVE();
>> > +  TRACE_LEAVE2("rc = %d (2 <=> fail)", rc);
>> >    return rc;
>> >  }
>> >
>> > @@ -566,6 +624,15 @@ lga_log_stream_hdl_rec_t *lga_log_stream
>> >    rec->log_stream_name.length = logStreamName->length;
>> >    memcpy((void *)rec->log_stream_name.value, (void
>> > *)logStreamName->value, logStreamName->length);
>> >    rec->log_header_type = log_header_type;
>> > +
>> > +  /***
>> > +   * Initiate the recovery flag
>> > +   * The setting means that the stream is initialized and that
>> there
>> > is
>> > +   * no reason to recover. This setting will change if server down
>> is
>> > +   * detected
>> > +   */
>> > +  rec->recovered_flag = true;
>> > +
>> >      /** Initialize the parent handle **/
>> >    rec->parent_hdl = *hdl_rec;
>> >
>> > @@ -616,6 +683,15 @@ lga_client_hdl_rec_t *lga_hdl_rec_add(lg
>> >       **/
>> >    rec->lgs_client_id = client_id;
>> >
>> > +  /***
>> > +   * Initiate the recovery flags
>> > +   * The setting means that the client is initialized and that there
>> > is
>> > +   * no reason to recover. This setting will change if server down
>> is
>> > +   * detected
>> > +   */
>> > +  rec->initialized_flag = true;
>> > +  rec->recovered_flag = true;
>> > +
>> >      /** Initialize and attach the IPC/Priority queue
>> >       **/
>> >    if (m_NCS_IPC_CREATE(&rec->mbx) != NCSCC_RC_SUCCESS) {


------------------------------------------------------------------------------
Site24x7 APM Insight: Get Deep Visibility into Application Performance
APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month
Monitor end-to-end web transactions and take corrective actions now
Troubleshoot faster and improve end-user experience. Signup Now!
http://pubads.g.doubleclick.net/gampad/clk?id=272487151&iu=/4140
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to