Hi Canh Van,
On 5/5/2017 6:37 PM, Canh Van Truong wrote:
> During testing, e.g: use saflogger in loop to send a log record to log service
> during failover, we frequently encounter saLogStreamClose() or saLogFinalize()
> get SA_AIS_ERR_TIMEOUT as active LOG service is shutdown while agent requests
> still remain in mailbox.
>
> The close request has come to active LOG, means it is put to the mailbox but
> not
> yet pick up out for processing yet as LOGsv is just getting TERM signal, then
> short
> time later, it does kill itself.
>
> The LOG agent did not get the ACK response for sync close request, and after
> 10s expired,
> MDS returns TIMEOUT back to the log agent.
>
> The solution for this is that, before calling daemon_exit, iterate all items
> in its mailbox,
> if that is agent request, send response with try again error to agent before
> going to shutdown
Application interprets both diffrently , it is true in both cases call
is succeeded ,
our current understanding is SA_AIS_ERR_TRY_AGAIN is to be send in very
shorter delay of service some /millisecond/,
if SA_AIS_ERR_TIMEOUT comes mean destination is not reachable ( and we
don't have any concept of short time non reachable )
this means application should not retry in /millisecond/, it should
evaluate event that occurred in cluster based not that
application may try or may not try again so let the application handle
it differently.
=====================================================================================================
SA_AIS_ERR_TIMEOUT - An implementation-dependent timeout occurred before
the call could complete. It is unspecified whether the call succeeded or
whether it did
not.
SA_AIS_ERR_TRY_AGAIN - The service cannot be provided at this time. The
process
may retry later.
=====================================================================================================
-AVM
> ---
> src/log/logd/lgs_cb.h | 1 +
> src/log/logd/lgs_evt.cc | 25 ++++++++++++++-----------
> src/log/logd/lgs_main.cc | 8 ++++++++
> 3 files changed, 23 insertions(+), 11 deletions(-)
>
> diff --git a/src/log/logd/lgs_cb.h b/src/log/logd/lgs_cb.h
> index bfd2822cc..b2a29bad9 100644
> --- a/src/log/logd/lgs_cb.h
> +++ b/src/log/logd/lgs_cb.h
> @@ -81,6 +81,7 @@ typedef struct lgs_cb {
> SaInvocationT
> amf_invocation_id; /* AMF InvocationID - needed to handle Quiesed
> state */
> bool is_quiesced_set;
> + bool is_terminating_set; /* Flag for osaflogd are terminating */
> SaImmOiHandleT immOiHandle; /* IMM OI handle */
> SaSelectionObjectT
> immSelectionObject; /* Selection Object to wait for IMM events */
> diff --git a/src/log/logd/lgs_evt.cc b/src/log/logd/lgs_evt.cc
> index 98ca5f71c..7a0614b43 100644
> --- a/src/log/logd/lgs_evt.cc
> +++ b/src/log/logd/lgs_evt.cc
> @@ -710,8 +710,8 @@ static uint32_t proc_initialize_msg(lgs_cb_t *cb,
> lgsv_lgs_evt_t *evt) {
> TRACE_ENTER2("dest %" PRIx64, evt->fr_dest);
>
> // Client should try again when role changes is in transition.
> - if (cb->is_quiesced_set) {
> - TRACE("Log service is in quiesced state");
> + if (cb->is_quiesced_set || cb->is_terminating_set) {
> + TRACE("Log service is in quiesced/terminating");
> ais_rc = SA_AIS_ERR_TRY_AGAIN;
> goto snd_rsp;
> }
> @@ -774,8 +774,8 @@ static uint32_t proc_finalize_msg(lgs_cb_t *cb,
> lgsv_lgs_evt_t *evt) {
> TRACE_ENTER2("client_id %u", client_id);
>
> // Client should try again when role changes is in transition.
> - if (cb->is_quiesced_set) {
> - TRACE("Log service is in quiesced state");
> + if (cb->is_quiesced_set || cb->is_terminating_set) {
> + TRACE("Log service is in quiesced/terminating");
> ais_rc = SA_AIS_ERR_TRY_AGAIN;
> goto snd_rsp;
> }
> @@ -1041,8 +1041,8 @@ static uint32_t proc_stream_open_msg(lgs_cb_t *cb,
> lgsv_lgs_evt_t *evt) {
> open_sync_param->client_id);
>
> // Client should try again when role changes is in transition.
> - if (cb->is_quiesced_set) {
> - TRACE("Log service is in quiesced state");
> + if (cb->is_quiesced_set || cb->is_terminating_set) {
> + TRACE("Log service is in quiesced/terminating");
> ais_rv = SA_AIS_ERR_TRY_AGAIN;
> goto snd_rsp;
> }
> @@ -1199,8 +1199,8 @@ static uint32_t proc_stream_close_msg(lgs_cb_t *cb,
> lgsv_lgs_evt_t *evt) {
> close_param->lstr_id);
>
> // Client should try again when role changes is in transition.
> - if (cb->is_quiesced_set) {
> - TRACE("Log service is in quiesced state");
> + if (cb->is_quiesced_set || cb->is_terminating_set) {
> + TRACE("Log service is in quiesced/terminating");
> ais_rc = SA_AIS_ERR_TRY_AGAIN;
> goto snd_rsp;
> }
> @@ -1299,8 +1299,8 @@ static uint32_t proc_write_log_async_msg(lgs_cb_t *cb,
> lgsv_lgs_evt_t *evt) {
> param->lstr_id, node_name);
>
> // Client should try again when role changes is in transition.
> - if (cb->is_quiesced_set) {
> - TRACE("Log service is in quiesced state");
> + if (cb->is_quiesced_set || cb->is_terminating_set) {
> + TRACE("Log service is in quiesced/terminating");
> error = SA_AIS_ERR_TRY_AGAIN;
> goto done;
> }
> @@ -1547,7 +1547,8 @@ void lgs_process_mbx(SYSF_MBX *mbx) {
> if (lgs_cb->ha_state == SA_AMF_HA_ACTIVE) {
> if (msg->evt_type <= LGSV_LGS_EVT_LGA_DOWN) {
> lgs_lgsv_top_level_evt_dispatch_tbl[msg->evt_type](msg);
> - } else if (msg->evt_type == LGSV_EVT_QUIESCED_ACK) {
> + } else if (msg->evt_type == LGSV_EVT_QUIESCED_ACK &&
> + lgs_cb->is_terminating_set == false) {
> proc_mds_quiesced_ack_msg(msg);
> } else if (msg->evt_type == LGSV_EVT_NO_OP) {
> TRACE("Jolted the main thread so it picks up the new IMM FD");
> @@ -1565,5 +1566,7 @@ void lgs_process_mbx(SYSF_MBX *mbx) {
> }
>
> lgs_evt_destroy(msg);
> + } else if (lgs_cb->is_terminating_set) {
> + lgs_cb->is_terminating_set = false;
> }
> }
> diff --git a/src/log/logd/lgs_main.cc b/src/log/logd/lgs_main.cc
> index fe2f9a2b8..fe0d31caf 100644
> --- a/src/log/logd/lgs_main.cc
> +++ b/src/log/logd/lgs_main.cc
> @@ -564,6 +564,14 @@ int main(int argc, char *argv[]) {
> }
>
> if (fds[FD_TERM].revents & POLLIN) {
> + // Process all requests in mailbox
> + lgs_cb->is_terminating_set = true;
> + if (fds[FD_MBX].revents & POLLIN) {
> + while (lgs_cb->is_terminating_set) {
> + lgs_process_mbx(&lgs_mbx);
> + }
> + }
> +
> daemon_exit();
> }
>
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel