Hi Canh Van,

The update one:

On 5/5/2017 6:37 PM, Canh Van Truong wrote:
> During testing, e.g: use saflogger in loop to send a log record to log service
> during failover, we frequently encounter saLogStreamClose() or saLogFinalize()
> get SA_AIS_ERR_TIMEOUT as active LOG service is shutdown while agent requests
> still remain in mailbox.
>
> The close request has come to active LOG, means it is put to the mailbox but 
> not
> yet pick up out for processing yet as LOGsv is just getting TERM signal, then 
> short
>   time later, it does kill itself.
>
> The LOG agent did not get the ACK response for sync close request, and after 
> 10s expired,
> MDS returns TIMEOUT back to the log agent.
>
> The solution for this is that, before calling daemon_exit, iterate all items 
> in its mailbox,
> if that is agent request, send response with try again error to agent before 
> going to shutdown

Application interprets both differently  , it is true in both cases 
call  is NOT succeeded,our current
understanding is SA_AIS_ERR_TRY_AGAIN is to be send in very shorter 
delay of service some millisecond,
if SA_AIS_ERR_TIMEOUT   comes mean destination is not reachable ( and we 
don't have any concept
of short time non reachable fail-over takes it time) this means 
application should not retry in millisecond, it should
evaluate event that occurred in cluster based not that application may 
try or may not try again so let the application handle
it differently.

=====================================================================================================
SA_AIS_ERR_TIMEOUT - An implementation-dependent timeout occurred before
the call could complete. It is unspecified whether the call succeeded or
whether it did
not.
SA_AIS_ERR_TRY_AGAIN - The service cannot be provided at this time. The
process
may retry later.
=====================================================================================================

-AVM

> ---
>   src/log/logd/lgs_cb.h    |  1 +
>   src/log/logd/lgs_evt.cc  | 25 ++++++++++++++-----------
>   src/log/logd/lgs_main.cc |  8 ++++++++
>   3 files changed, 23 insertions(+), 11 deletions(-)
>
> diff --git a/src/log/logd/lgs_cb.h b/src/log/logd/lgs_cb.h
> index bfd2822cc..b2a29bad9 100644
> --- a/src/log/logd/lgs_cb.h
> +++ b/src/log/logd/lgs_cb.h
> @@ -81,6 +81,7 @@ typedef struct lgs_cb {
>     SaInvocationT
>         amf_invocation_id; /* AMF InvocationID - needed to handle Quiesed 
> state */
>     bool is_quiesced_set;
> +  bool is_terminating_set;  /* Flag for osaflogd are terminating */
>     SaImmOiHandleT immOiHandle; /* IMM OI handle                           */
>     SaSelectionObjectT
>         immSelectionObject; /* Selection Object to wait for IMM events */
> diff --git a/src/log/logd/lgs_evt.cc b/src/log/logd/lgs_evt.cc
> index 98ca5f71c..7a0614b43 100644
> --- a/src/log/logd/lgs_evt.cc
> +++ b/src/log/logd/lgs_evt.cc
> @@ -710,8 +710,8 @@ static uint32_t proc_initialize_msg(lgs_cb_t *cb, 
> lgsv_lgs_evt_t *evt) {
>     TRACE_ENTER2("dest %" PRIx64, evt->fr_dest);
>   
>     // Client should try again when role changes is in transition.
> -  if (cb->is_quiesced_set) {
> -    TRACE("Log service is in quiesced state");
> +  if (cb->is_quiesced_set || cb->is_terminating_set) {
> +    TRACE("Log service is in quiesced/terminating");
>       ais_rc = SA_AIS_ERR_TRY_AGAIN;
>       goto snd_rsp;
>     }
> @@ -774,8 +774,8 @@ static uint32_t proc_finalize_msg(lgs_cb_t *cb, 
> lgsv_lgs_evt_t *evt) {
>     TRACE_ENTER2("client_id %u", client_id);
>   
>     // Client should try again when role changes is in transition.
> -  if (cb->is_quiesced_set) {
> -    TRACE("Log service is in quiesced state");
> +  if (cb->is_quiesced_set || cb->is_terminating_set) {
> +    TRACE("Log service is in quiesced/terminating");
>       ais_rc = SA_AIS_ERR_TRY_AGAIN;
>       goto snd_rsp;
>     }
> @@ -1041,8 +1041,8 @@ static uint32_t proc_stream_open_msg(lgs_cb_t *cb, 
> lgsv_lgs_evt_t *evt) {
>                  open_sync_param->client_id);
>   
>     // Client should try again when role changes is in transition.
> -  if (cb->is_quiesced_set) {
> -    TRACE("Log service is in quiesced state");
> +  if (cb->is_quiesced_set || cb->is_terminating_set) {
> +    TRACE("Log service is in quiesced/terminating");
>       ais_rv = SA_AIS_ERR_TRY_AGAIN;
>       goto snd_rsp;
>     }
> @@ -1199,8 +1199,8 @@ static uint32_t proc_stream_close_msg(lgs_cb_t *cb, 
> lgsv_lgs_evt_t *evt) {
>                  close_param->lstr_id);
>   
>     // Client should try again when role changes is in transition.
> -  if (cb->is_quiesced_set) {
> -    TRACE("Log service is in quiesced state");
> +  if (cb->is_quiesced_set || cb->is_terminating_set) {
> +    TRACE("Log service is in quiesced/terminating");
>       ais_rc = SA_AIS_ERR_TRY_AGAIN;
>       goto snd_rsp;
>     }
> @@ -1299,8 +1299,8 @@ static uint32_t proc_write_log_async_msg(lgs_cb_t *cb, 
> lgsv_lgs_evt_t *evt) {
>                  param->lstr_id, node_name);
>   
>     // Client should try again when role changes is in transition.
> -  if (cb->is_quiesced_set) {
> -    TRACE("Log service is in quiesced state");
> +  if (cb->is_quiesced_set || cb->is_terminating_set) {
> +    TRACE("Log service is in quiesced/terminating");
>       error = SA_AIS_ERR_TRY_AGAIN;
>       goto done;
>     }
> @@ -1547,7 +1547,8 @@ void lgs_process_mbx(SYSF_MBX *mbx) {
>       if (lgs_cb->ha_state == SA_AMF_HA_ACTIVE) {
>         if (msg->evt_type <= LGSV_LGS_EVT_LGA_DOWN) {
>           lgs_lgsv_top_level_evt_dispatch_tbl[msg->evt_type](msg);
> -      } else if (msg->evt_type == LGSV_EVT_QUIESCED_ACK) {
> +      } else if (msg->evt_type == LGSV_EVT_QUIESCED_ACK &&
> +          lgs_cb->is_terminating_set == false) {
>           proc_mds_quiesced_ack_msg(msg);
>         } else if (msg->evt_type == LGSV_EVT_NO_OP) {
>           TRACE("Jolted the main thread so it picks up the new IMM FD");
> @@ -1565,5 +1566,7 @@ void lgs_process_mbx(SYSF_MBX *mbx) {
>       }
>   
>       lgs_evt_destroy(msg);
> +  } else if (lgs_cb->is_terminating_set) {
> +      lgs_cb->is_terminating_set = false;
>     }
>   }
> diff --git a/src/log/logd/lgs_main.cc b/src/log/logd/lgs_main.cc
> index fe2f9a2b8..fe0d31caf 100644
> --- a/src/log/logd/lgs_main.cc
> +++ b/src/log/logd/lgs_main.cc
> @@ -564,6 +564,14 @@ int main(int argc, char *argv[]) {
>       }
>   
>       if (fds[FD_TERM].revents & POLLIN) {
> +      // Process all requests in mailbox
> +      lgs_cb->is_terminating_set = true;
> +      if (fds[FD_MBX].revents & POLLIN) {
> +        while (lgs_cb->is_terminating_set) {
> +          lgs_process_mbx(&lgs_mbx);
> +        }
> +      }
> +
>         daemon_exit();
>       }
>   


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to