Ack Thanks Lennart
> -----Original Message----- > From: Canh Van Truong > Sent: den 5 maj 2017 15:08 > To: Lennart Lund <lennart.l...@ericsson.com>; Vu Minh Nguyen > <vu.m.ngu...@dektech.com.au>; mahesh.va...@oracle.com > Cc: opensaf-devel@lists.sourceforge.net; Canh Van Truong > <canh.v.tru...@dektech.com.au> > Subject: [PATCH 1/1] log: fix agent gets TRY_AGAIN instead TIMEOUT during > failover [#2411] > > During testing, e.g: use saflogger in loop to send a log record to log service > during failover, we frequently encounter saLogStreamClose() or > saLogFinalize() > get SA_AIS_ERR_TIMEOUT as active LOG service is shutdown while agent > requests > still remain in mailbox. > > The close request has come to active LOG, means it is put to the mailbox but > not > yet pick up out for processing yet as LOGsv is just getting TERM signal, then > short > time later, it does kill itself. > > The LOG agent did not get the ACK response for sync close request, and after > 10s expired, > MDS returns TIMEOUT back to the log agent. > > The solution for this is that, before calling daemon_exit, iterate all items > in its > mailbox, > if that is agent request, send response with try again error to agent before > going to shutdown > --- > src/log/logd/lgs_cb.h | 1 + > src/log/logd/lgs_evt.cc | 25 ++++++++++++++----------- > src/log/logd/lgs_main.cc | 8 ++++++++ > 3 files changed, 23 insertions(+), 11 deletions(-) > > diff --git a/src/log/logd/lgs_cb.h b/src/log/logd/lgs_cb.h > index bfd2822cc..b2a29bad9 100644 > --- a/src/log/logd/lgs_cb.h > +++ b/src/log/logd/lgs_cb.h > @@ -81,6 +81,7 @@ typedef struct lgs_cb { > SaInvocationT > amf_invocation_id; /* AMF InvocationID - needed to handle Quiesed > state */ > bool is_quiesced_set; > + bool is_terminating_set; /* Flag for osaflogd are terminating */ > SaImmOiHandleT immOiHandle; /* IMM OI handle */ > SaSelectionObjectT > immSelectionObject; /* Selection Object to wait for IMM events */ > diff --git a/src/log/logd/lgs_evt.cc b/src/log/logd/lgs_evt.cc > index 98ca5f71c..7a0614b43 100644 > --- a/src/log/logd/lgs_evt.cc > +++ b/src/log/logd/lgs_evt.cc > @@ -710,8 +710,8 @@ static uint32_t proc_initialize_msg(lgs_cb_t *cb, > lgsv_lgs_evt_t *evt) { > TRACE_ENTER2("dest %" PRIx64, evt->fr_dest); > > // Client should try again when role changes is in transition. > - if (cb->is_quiesced_set) { > - TRACE("Log service is in quiesced state"); > + if (cb->is_quiesced_set || cb->is_terminating_set) { > + TRACE("Log service is in quiesced/terminating"); > ais_rc = SA_AIS_ERR_TRY_AGAIN; > goto snd_rsp; > } > @@ -774,8 +774,8 @@ static uint32_t proc_finalize_msg(lgs_cb_t *cb, > lgsv_lgs_evt_t *evt) { > TRACE_ENTER2("client_id %u", client_id); > > // Client should try again when role changes is in transition. > - if (cb->is_quiesced_set) { > - TRACE("Log service is in quiesced state"); > + if (cb->is_quiesced_set || cb->is_terminating_set) { > + TRACE("Log service is in quiesced/terminating"); > ais_rc = SA_AIS_ERR_TRY_AGAIN; > goto snd_rsp; > } > @@ -1041,8 +1041,8 @@ static uint32_t proc_stream_open_msg(lgs_cb_t > *cb, lgsv_lgs_evt_t *evt) { > open_sync_param->client_id); > > // Client should try again when role changes is in transition. > - if (cb->is_quiesced_set) { > - TRACE("Log service is in quiesced state"); > + if (cb->is_quiesced_set || cb->is_terminating_set) { > + TRACE("Log service is in quiesced/terminating"); > ais_rv = SA_AIS_ERR_TRY_AGAIN; > goto snd_rsp; > } > @@ -1199,8 +1199,8 @@ static uint32_t proc_stream_close_msg(lgs_cb_t > *cb, lgsv_lgs_evt_t *evt) { > close_param->lstr_id); > > // Client should try again when role changes is in transition. > - if (cb->is_quiesced_set) { > - TRACE("Log service is in quiesced state"); > + if (cb->is_quiesced_set || cb->is_terminating_set) { > + TRACE("Log service is in quiesced/terminating"); > ais_rc = SA_AIS_ERR_TRY_AGAIN; > goto snd_rsp; > } > @@ -1299,8 +1299,8 @@ static uint32_t proc_write_log_async_msg(lgs_cb_t > *cb, lgsv_lgs_evt_t *evt) { > param->lstr_id, node_name); > > // Client should try again when role changes is in transition. > - if (cb->is_quiesced_set) { > - TRACE("Log service is in quiesced state"); > + if (cb->is_quiesced_set || cb->is_terminating_set) { > + TRACE("Log service is in quiesced/terminating"); > error = SA_AIS_ERR_TRY_AGAIN; > goto done; > } > @@ -1547,7 +1547,8 @@ void lgs_process_mbx(SYSF_MBX *mbx) { > if (lgs_cb->ha_state == SA_AMF_HA_ACTIVE) { > if (msg->evt_type <= LGSV_LGS_EVT_LGA_DOWN) { > lgs_lgsv_top_level_evt_dispatch_tbl[msg->evt_type](msg); > - } else if (msg->evt_type == LGSV_EVT_QUIESCED_ACK) { > + } else if (msg->evt_type == LGSV_EVT_QUIESCED_ACK && > + lgs_cb->is_terminating_set == false) { > proc_mds_quiesced_ack_msg(msg); > } else if (msg->evt_type == LGSV_EVT_NO_OP) { > TRACE("Jolted the main thread so it picks up the new IMM FD"); > @@ -1565,5 +1566,7 @@ void lgs_process_mbx(SYSF_MBX *mbx) { > } > > lgs_evt_destroy(msg); > + } else if (lgs_cb->is_terminating_set) { > + lgs_cb->is_terminating_set = false; > } > } > diff --git a/src/log/logd/lgs_main.cc b/src/log/logd/lgs_main.cc > index fe2f9a2b8..fe0d31caf 100644 > --- a/src/log/logd/lgs_main.cc > +++ b/src/log/logd/lgs_main.cc > @@ -564,6 +564,14 @@ int main(int argc, char *argv[]) { > } > > if (fds[FD_TERM].revents & POLLIN) { > + // Process all requests in mailbox > + lgs_cb->is_terminating_set = true; > + if (fds[FD_MBX].revents & POLLIN) { > + while (lgs_cb->is_terminating_set) { > + lgs_process_mbx(&lgs_mbx); > + } > + } > + > daemon_exit(); > } > > -- > 2.11.0 ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel