Ack

Thanks
Lennart

> -----Original Message-----
> From: Canh Van Truong
> Sent: den 5 maj 2017 15:08
> To: Lennart Lund <lennart.l...@ericsson.com>; Vu Minh Nguyen
> <vu.m.ngu...@dektech.com.au>; mahesh.va...@oracle.com
> Cc: opensaf-devel@lists.sourceforge.net; Canh Van Truong
> <canh.v.tru...@dektech.com.au>
> Subject: [PATCH 1/1] log: fix agent gets TRY_AGAIN instead TIMEOUT during
> failover [#2411]
> 
> During testing, e.g: use saflogger in loop to send a log record to log service
> during failover, we frequently encounter saLogStreamClose() or
> saLogFinalize()
> get SA_AIS_ERR_TIMEOUT as active LOG service is shutdown while agent
> requests
> still remain in mailbox.
> 
> The close request has come to active LOG, means it is put to the mailbox but
> not
> yet pick up out for processing yet as LOGsv is just getting TERM signal, then
> short
>  time later, it does kill itself.
> 
> The LOG agent did not get the ACK response for sync close request, and after
> 10s expired,
> MDS returns TIMEOUT back to the log agent.
> 
> The solution for this is that, before calling daemon_exit, iterate all items 
> in its
> mailbox,
> if that is agent request, send response with try again error to agent before
> going to shutdown
> ---
>  src/log/logd/lgs_cb.h    |  1 +
>  src/log/logd/lgs_evt.cc  | 25 ++++++++++++++-----------
>  src/log/logd/lgs_main.cc |  8 ++++++++
>  3 files changed, 23 insertions(+), 11 deletions(-)
> 
> diff --git a/src/log/logd/lgs_cb.h b/src/log/logd/lgs_cb.h
> index bfd2822cc..b2a29bad9 100644
> --- a/src/log/logd/lgs_cb.h
> +++ b/src/log/logd/lgs_cb.h
> @@ -81,6 +81,7 @@ typedef struct lgs_cb {
>    SaInvocationT
>        amf_invocation_id; /* AMF InvocationID - needed to handle Quiesed
> state */
>    bool is_quiesced_set;
> +  bool is_terminating_set;  /* Flag for osaflogd are terminating */
>    SaImmOiHandleT immOiHandle; /* IMM OI handle                           */
>    SaSelectionObjectT
>        immSelectionObject; /* Selection Object to wait for IMM events */
> diff --git a/src/log/logd/lgs_evt.cc b/src/log/logd/lgs_evt.cc
> index 98ca5f71c..7a0614b43 100644
> --- a/src/log/logd/lgs_evt.cc
> +++ b/src/log/logd/lgs_evt.cc
> @@ -710,8 +710,8 @@ static uint32_t proc_initialize_msg(lgs_cb_t *cb,
> lgsv_lgs_evt_t *evt) {
>    TRACE_ENTER2("dest %" PRIx64, evt->fr_dest);
> 
>    // Client should try again when role changes is in transition.
> -  if (cb->is_quiesced_set) {
> -    TRACE("Log service is in quiesced state");
> +  if (cb->is_quiesced_set || cb->is_terminating_set) {
> +    TRACE("Log service is in quiesced/terminating");
>      ais_rc = SA_AIS_ERR_TRY_AGAIN;
>      goto snd_rsp;
>    }
> @@ -774,8 +774,8 @@ static uint32_t proc_finalize_msg(lgs_cb_t *cb,
> lgsv_lgs_evt_t *evt) {
>    TRACE_ENTER2("client_id %u", client_id);
> 
>    // Client should try again when role changes is in transition.
> -  if (cb->is_quiesced_set) {
> -    TRACE("Log service is in quiesced state");
> +  if (cb->is_quiesced_set || cb->is_terminating_set) {
> +    TRACE("Log service is in quiesced/terminating");
>      ais_rc = SA_AIS_ERR_TRY_AGAIN;
>      goto snd_rsp;
>    }
> @@ -1041,8 +1041,8 @@ static uint32_t proc_stream_open_msg(lgs_cb_t
> *cb, lgsv_lgs_evt_t *evt) {
>                 open_sync_param->client_id);
> 
>    // Client should try again when role changes is in transition.
> -  if (cb->is_quiesced_set) {
> -    TRACE("Log service is in quiesced state");
> +  if (cb->is_quiesced_set || cb->is_terminating_set) {
> +    TRACE("Log service is in quiesced/terminating");
>      ais_rv = SA_AIS_ERR_TRY_AGAIN;
>      goto snd_rsp;
>    }
> @@ -1199,8 +1199,8 @@ static uint32_t proc_stream_close_msg(lgs_cb_t
> *cb, lgsv_lgs_evt_t *evt) {
>                 close_param->lstr_id);
> 
>    // Client should try again when role changes is in transition.
> -  if (cb->is_quiesced_set) {
> -    TRACE("Log service is in quiesced state");
> +  if (cb->is_quiesced_set || cb->is_terminating_set) {
> +    TRACE("Log service is in quiesced/terminating");
>      ais_rc = SA_AIS_ERR_TRY_AGAIN;
>      goto snd_rsp;
>    }
> @@ -1299,8 +1299,8 @@ static uint32_t proc_write_log_async_msg(lgs_cb_t
> *cb, lgsv_lgs_evt_t *evt) {
>                 param->lstr_id, node_name);
> 
>    // Client should try again when role changes is in transition.
> -  if (cb->is_quiesced_set) {
> -    TRACE("Log service is in quiesced state");
> +  if (cb->is_quiesced_set || cb->is_terminating_set) {
> +    TRACE("Log service is in quiesced/terminating");
>      error = SA_AIS_ERR_TRY_AGAIN;
>      goto done;
>    }
> @@ -1547,7 +1547,8 @@ void lgs_process_mbx(SYSF_MBX *mbx) {
>      if (lgs_cb->ha_state == SA_AMF_HA_ACTIVE) {
>        if (msg->evt_type <= LGSV_LGS_EVT_LGA_DOWN) {
>          lgs_lgsv_top_level_evt_dispatch_tbl[msg->evt_type](msg);
> -      } else if (msg->evt_type == LGSV_EVT_QUIESCED_ACK) {
> +      } else if (msg->evt_type == LGSV_EVT_QUIESCED_ACK &&
> +          lgs_cb->is_terminating_set == false) {
>          proc_mds_quiesced_ack_msg(msg);
>        } else if (msg->evt_type == LGSV_EVT_NO_OP) {
>          TRACE("Jolted the main thread so it picks up the new IMM FD");
> @@ -1565,5 +1566,7 @@ void lgs_process_mbx(SYSF_MBX *mbx) {
>      }
> 
>      lgs_evt_destroy(msg);
> +  } else if (lgs_cb->is_terminating_set) {
> +      lgs_cb->is_terminating_set = false;
>    }
>  }
> diff --git a/src/log/logd/lgs_main.cc b/src/log/logd/lgs_main.cc
> index fe2f9a2b8..fe0d31caf 100644
> --- a/src/log/logd/lgs_main.cc
> +++ b/src/log/logd/lgs_main.cc
> @@ -564,6 +564,14 @@ int main(int argc, char *argv[]) {
>      }
> 
>      if (fds[FD_TERM].revents & POLLIN) {
> +      // Process all requests in mailbox
> +      lgs_cb->is_terminating_set = true;
> +      if (fds[FD_MBX].revents & POLLIN) {
> +        while (lgs_cb->is_terminating_set) {
> +          lgs_process_mbx(&lgs_mbx);
> +        }
> +      }
> +
>        daemon_exit();
>      }
> 
> --
> 2.11.0


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to