Hi Canh Van,
On 5/5/2017 6:37 PM, Canh Van Truong wrote: > During testing, e.g: use saflogger in loop to send a log record to log service > during failover, we frequently encounter saLogStreamClose() or saLogFinalize() > get SA_AIS_ERR_TIMEOUT as active LOG service is shutdown while agent requests > still remain in mailbox. > > The close request has come to active LOG, means it is put to the mailbox but > not > yet pick up out for processing yet as LOGsv is just getting TERM signal, then > short > time later, it does kill itself. > > The LOG agent did not get the ACK response for sync close request, and after > 10s expired, > MDS returns TIMEOUT back to the log agent. > > The solution for this is that, before calling daemon_exit, iterate all items > in its mailbox, > if that is agent request, send response with try again error to agent before > going to shutdown Application interprets both diffrently , it is true in both cases call is succeeded , our current understanding is SA_AIS_ERR_TRY_AGAIN is to be send in very shorter delay of service some /millisecond/, if SA_AIS_ERR_TIMEOUT comes mean destination is not reachable ( and we don't have any concept of short time non reachable ) this means application should not retry in /millisecond/, it should evaluate event that occurred in cluster based not that application may try or may not try again so let the application handle it differently. ===================================================================================================== SA_AIS_ERR_TIMEOUT - An implementation-dependent timeout occurred before the call could complete. It is unspecified whether the call succeeded or whether it did not. SA_AIS_ERR_TRY_AGAIN - The service cannot be provided at this time. The process may retry later. ===================================================================================================== -AVM > --- > src/log/logd/lgs_cb.h | 1 + > src/log/logd/lgs_evt.cc | 25 ++++++++++++++----------- > src/log/logd/lgs_main.cc | 8 ++++++++ > 3 files changed, 23 insertions(+), 11 deletions(-) > > diff --git a/src/log/logd/lgs_cb.h b/src/log/logd/lgs_cb.h > index bfd2822cc..b2a29bad9 100644 > --- a/src/log/logd/lgs_cb.h > +++ b/src/log/logd/lgs_cb.h > @@ -81,6 +81,7 @@ typedef struct lgs_cb { > SaInvocationT > amf_invocation_id; /* AMF InvocationID - needed to handle Quiesed > state */ > bool is_quiesced_set; > + bool is_terminating_set; /* Flag for osaflogd are terminating */ > SaImmOiHandleT immOiHandle; /* IMM OI handle */ > SaSelectionObjectT > immSelectionObject; /* Selection Object to wait for IMM events */ > diff --git a/src/log/logd/lgs_evt.cc b/src/log/logd/lgs_evt.cc > index 98ca5f71c..7a0614b43 100644 > --- a/src/log/logd/lgs_evt.cc > +++ b/src/log/logd/lgs_evt.cc > @@ -710,8 +710,8 @@ static uint32_t proc_initialize_msg(lgs_cb_t *cb, > lgsv_lgs_evt_t *evt) { > TRACE_ENTER2("dest %" PRIx64, evt->fr_dest); > > // Client should try again when role changes is in transition. > - if (cb->is_quiesced_set) { > - TRACE("Log service is in quiesced state"); > + if (cb->is_quiesced_set || cb->is_terminating_set) { > + TRACE("Log service is in quiesced/terminating"); > ais_rc = SA_AIS_ERR_TRY_AGAIN; > goto snd_rsp; > } > @@ -774,8 +774,8 @@ static uint32_t proc_finalize_msg(lgs_cb_t *cb, > lgsv_lgs_evt_t *evt) { > TRACE_ENTER2("client_id %u", client_id); > > // Client should try again when role changes is in transition. > - if (cb->is_quiesced_set) { > - TRACE("Log service is in quiesced state"); > + if (cb->is_quiesced_set || cb->is_terminating_set) { > + TRACE("Log service is in quiesced/terminating"); > ais_rc = SA_AIS_ERR_TRY_AGAIN; > goto snd_rsp; > } > @@ -1041,8 +1041,8 @@ static uint32_t proc_stream_open_msg(lgs_cb_t *cb, > lgsv_lgs_evt_t *evt) { > open_sync_param->client_id); > > // Client should try again when role changes is in transition. > - if (cb->is_quiesced_set) { > - TRACE("Log service is in quiesced state"); > + if (cb->is_quiesced_set || cb->is_terminating_set) { > + TRACE("Log service is in quiesced/terminating"); > ais_rv = SA_AIS_ERR_TRY_AGAIN; > goto snd_rsp; > } > @@ -1199,8 +1199,8 @@ static uint32_t proc_stream_close_msg(lgs_cb_t *cb, > lgsv_lgs_evt_t *evt) { > close_param->lstr_id); > > // Client should try again when role changes is in transition. > - if (cb->is_quiesced_set) { > - TRACE("Log service is in quiesced state"); > + if (cb->is_quiesced_set || cb->is_terminating_set) { > + TRACE("Log service is in quiesced/terminating"); > ais_rc = SA_AIS_ERR_TRY_AGAIN; > goto snd_rsp; > } > @@ -1299,8 +1299,8 @@ static uint32_t proc_write_log_async_msg(lgs_cb_t *cb, > lgsv_lgs_evt_t *evt) { > param->lstr_id, node_name); > > // Client should try again when role changes is in transition. > - if (cb->is_quiesced_set) { > - TRACE("Log service is in quiesced state"); > + if (cb->is_quiesced_set || cb->is_terminating_set) { > + TRACE("Log service is in quiesced/terminating"); > error = SA_AIS_ERR_TRY_AGAIN; > goto done; > } > @@ -1547,7 +1547,8 @@ void lgs_process_mbx(SYSF_MBX *mbx) { > if (lgs_cb->ha_state == SA_AMF_HA_ACTIVE) { > if (msg->evt_type <= LGSV_LGS_EVT_LGA_DOWN) { > lgs_lgsv_top_level_evt_dispatch_tbl[msg->evt_type](msg); > - } else if (msg->evt_type == LGSV_EVT_QUIESCED_ACK) { > + } else if (msg->evt_type == LGSV_EVT_QUIESCED_ACK && > + lgs_cb->is_terminating_set == false) { > proc_mds_quiesced_ack_msg(msg); > } else if (msg->evt_type == LGSV_EVT_NO_OP) { > TRACE("Jolted the main thread so it picks up the new IMM FD"); > @@ -1565,5 +1566,7 @@ void lgs_process_mbx(SYSF_MBX *mbx) { > } > > lgs_evt_destroy(msg); > + } else if (lgs_cb->is_terminating_set) { > + lgs_cb->is_terminating_set = false; > } > } > diff --git a/src/log/logd/lgs_main.cc b/src/log/logd/lgs_main.cc > index fe2f9a2b8..fe0d31caf 100644 > --- a/src/log/logd/lgs_main.cc > +++ b/src/log/logd/lgs_main.cc > @@ -564,6 +564,14 @@ int main(int argc, char *argv[]) { > } > > if (fds[FD_TERM].revents & POLLIN) { > + // Process all requests in mailbox > + lgs_cb->is_terminating_set = true; > + if (fds[FD_MBX].revents & POLLIN) { > + while (lgs_cb->is_terminating_set) { > + lgs_process_mbx(&lgs_mbx); > + } > + } > + > daemon_exit(); > } > ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel