Hi Canh Van,

On 5/5/2017 6:37 PM, Canh Van Truong wrote:
> During testing, e.g: use saflogger in loop to send a log record to log service
> during failover, we frequently encounter saLogStreamClose() or saLogFinalize()
> get SA_AIS_ERR_TIMEOUT as active LOG service is shutdown while agent requests
> still remain in mailbox.
>
> The close request has come to active LOG, means it is put to the mailbox but 
> not
> yet pick up out for processing yet as LOGsv is just getting TERM signal, then 
> short
>   time later, it does kill itself.
>
> The LOG agent did not get the ACK response for sync close request, and after 
> 10s expired,
> MDS returns TIMEOUT back to the log agent.
>
> The solution for this is that, before calling daemon_exit, iterate all items 
> in its mailbox,
> if that is agent request, send response with try again error to agent before 
> going to shutdown

Application interprets both diffrently  , it is true in both cases call 
is succeeded ,
our current understanding is SA_AIS_ERR_TRY_AGAIN is to be send in very 
shorter delay of service some /millisecond/,
if SA_AIS_ERR_TIMEOUT   comes mean destination is not reachable ( and we 
don't have any concept of short time non reachable )
this means application should not retry in /millisecond/, it should 
evaluate event that occurred in cluster based not that
application may try or may not try again so let the application handle 
it differently.

=====================================================================================================
SA_AIS_ERR_TIMEOUT - An implementation-dependent timeout occurred before
the call could complete. It is unspecified whether the call succeeded or 
whether it did
not.
SA_AIS_ERR_TRY_AGAIN - The service cannot be provided at this time. The 
process
may retry later.
=====================================================================================================

-AVM

> ---
>   src/log/logd/lgs_cb.h    |  1 +
>   src/log/logd/lgs_evt.cc  | 25 ++++++++++++++-----------
>   src/log/logd/lgs_main.cc |  8 ++++++++
>   3 files changed, 23 insertions(+), 11 deletions(-)
>
> diff --git a/src/log/logd/lgs_cb.h b/src/log/logd/lgs_cb.h
> index bfd2822cc..b2a29bad9 100644
> --- a/src/log/logd/lgs_cb.h
> +++ b/src/log/logd/lgs_cb.h
> @@ -81,6 +81,7 @@ typedef struct lgs_cb {
>     SaInvocationT
>         amf_invocation_id; /* AMF InvocationID - needed to handle Quiesed 
> state */
>     bool is_quiesced_set;
> +  bool is_terminating_set;  /* Flag for osaflogd are terminating */
>     SaImmOiHandleT immOiHandle; /* IMM OI handle                           */
>     SaSelectionObjectT
>         immSelectionObject; /* Selection Object to wait for IMM events */
> diff --git a/src/log/logd/lgs_evt.cc b/src/log/logd/lgs_evt.cc
> index 98ca5f71c..7a0614b43 100644
> --- a/src/log/logd/lgs_evt.cc
> +++ b/src/log/logd/lgs_evt.cc
> @@ -710,8 +710,8 @@ static uint32_t proc_initialize_msg(lgs_cb_t *cb, 
> lgsv_lgs_evt_t *evt) {
>     TRACE_ENTER2("dest %" PRIx64, evt->fr_dest);
>   
>     // Client should try again when role changes is in transition.
> -  if (cb->is_quiesced_set) {
> -    TRACE("Log service is in quiesced state");
> +  if (cb->is_quiesced_set || cb->is_terminating_set) {
> +    TRACE("Log service is in quiesced/terminating");
>       ais_rc = SA_AIS_ERR_TRY_AGAIN;
>       goto snd_rsp;
>     }
> @@ -774,8 +774,8 @@ static uint32_t proc_finalize_msg(lgs_cb_t *cb, 
> lgsv_lgs_evt_t *evt) {
>     TRACE_ENTER2("client_id %u", client_id);
>   
>     // Client should try again when role changes is in transition.
> -  if (cb->is_quiesced_set) {
> -    TRACE("Log service is in quiesced state");
> +  if (cb->is_quiesced_set || cb->is_terminating_set) {
> +    TRACE("Log service is in quiesced/terminating");
>       ais_rc = SA_AIS_ERR_TRY_AGAIN;
>       goto snd_rsp;
>     }
> @@ -1041,8 +1041,8 @@ static uint32_t proc_stream_open_msg(lgs_cb_t *cb, 
> lgsv_lgs_evt_t *evt) {
>                  open_sync_param->client_id);
>   
>     // Client should try again when role changes is in transition.
> -  if (cb->is_quiesced_set) {
> -    TRACE("Log service is in quiesced state");
> +  if (cb->is_quiesced_set || cb->is_terminating_set) {
> +    TRACE("Log service is in quiesced/terminating");
>       ais_rv = SA_AIS_ERR_TRY_AGAIN;
>       goto snd_rsp;
>     }
> @@ -1199,8 +1199,8 @@ static uint32_t proc_stream_close_msg(lgs_cb_t *cb, 
> lgsv_lgs_evt_t *evt) {
>                  close_param->lstr_id);
>   
>     // Client should try again when role changes is in transition.
> -  if (cb->is_quiesced_set) {
> -    TRACE("Log service is in quiesced state");
> +  if (cb->is_quiesced_set || cb->is_terminating_set) {
> +    TRACE("Log service is in quiesced/terminating");
>       ais_rc = SA_AIS_ERR_TRY_AGAIN;
>       goto snd_rsp;
>     }
> @@ -1299,8 +1299,8 @@ static uint32_t proc_write_log_async_msg(lgs_cb_t *cb, 
> lgsv_lgs_evt_t *evt) {
>                  param->lstr_id, node_name);
>   
>     // Client should try again when role changes is in transition.
> -  if (cb->is_quiesced_set) {
> -    TRACE("Log service is in quiesced state");
> +  if (cb->is_quiesced_set || cb->is_terminating_set) {
> +    TRACE("Log service is in quiesced/terminating");
>       error = SA_AIS_ERR_TRY_AGAIN;
>       goto done;
>     }
> @@ -1547,7 +1547,8 @@ void lgs_process_mbx(SYSF_MBX *mbx) {
>       if (lgs_cb->ha_state == SA_AMF_HA_ACTIVE) {
>         if (msg->evt_type <= LGSV_LGS_EVT_LGA_DOWN) {
>           lgs_lgsv_top_level_evt_dispatch_tbl[msg->evt_type](msg);
> -      } else if (msg->evt_type == LGSV_EVT_QUIESCED_ACK) {
> +      } else if (msg->evt_type == LGSV_EVT_QUIESCED_ACK &&
> +          lgs_cb->is_terminating_set == false) {
>           proc_mds_quiesced_ack_msg(msg);
>         } else if (msg->evt_type == LGSV_EVT_NO_OP) {
>           TRACE("Jolted the main thread so it picks up the new IMM FD");
> @@ -1565,5 +1566,7 @@ void lgs_process_mbx(SYSF_MBX *mbx) {
>       }
>   
>       lgs_evt_destroy(msg);
> +  } else if (lgs_cb->is_terminating_set) {
> +      lgs_cb->is_terminating_set = false;
>     }
>   }
> diff --git a/src/log/logd/lgs_main.cc b/src/log/logd/lgs_main.cc
> index fe2f9a2b8..fe0d31caf 100644
> --- a/src/log/logd/lgs_main.cc
> +++ b/src/log/logd/lgs_main.cc
> @@ -564,6 +564,14 @@ int main(int argc, char *argv[]) {
>       }
>   
>       if (fds[FD_TERM].revents & POLLIN) {
> +      // Process all requests in mailbox
> +      lgs_cb->is_terminating_set = true;
> +      if (fds[FD_MBX].revents & POLLIN) {
> +        while (lgs_cb->is_terminating_set) {
> +          lgs_process_mbx(&lgs_mbx);
> +        }
> +      }
> +
>         daemon_exit();
>       }
>   


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to