During testing, e.g: use saflogger in loop to send a log record to log service
during failover, we frequently encounter saLogStreamClose() or saLogFinalize()
get SA_AIS_ERR_TIMEOUT as active LOG service is shutdown while agent requests
still remain in mailbox.
The close request has come to active LOG, means it is put to the mailbox but not
yet pick up out for processing yet as LOGsv is just getting TERM signal, then
short
time later, it does kill itself.
The LOG agent did not get the ACK response for sync close request, and after
10s expired,
MDS returns TIMEOUT back to the log agent.
The solution for this is that, before calling daemon_exit, iterate all items in
its mailbox,
if that is agent request, send response with try again error to agent before
going to shutdown
---
src/log/logd/lgs_cb.h | 1 +
src/log/logd/lgs_evt.cc | 25 ++++++++++++++-----------
src/log/logd/lgs_main.cc | 8 ++++++++
3 files changed, 23 insertions(+), 11 deletions(-)
diff --git a/src/log/logd/lgs_cb.h b/src/log/logd/lgs_cb.h
index bfd2822cc..b2a29bad9 100644
--- a/src/log/logd/lgs_cb.h
+++ b/src/log/logd/lgs_cb.h
@@ -81,6 +81,7 @@ typedef struct lgs_cb {
SaInvocationT
amf_invocation_id; /* AMF InvocationID - needed to handle Quiesed state
*/
bool is_quiesced_set;
+ bool is_terminating_set; /* Flag for osaflogd are terminating */
SaImmOiHandleT immOiHandle; /* IMM OI handle */
SaSelectionObjectT
immSelectionObject; /* Selection Object to wait for IMM events */
diff --git a/src/log/logd/lgs_evt.cc b/src/log/logd/lgs_evt.cc
index 98ca5f71c..7a0614b43 100644
--- a/src/log/logd/lgs_evt.cc
+++ b/src/log/logd/lgs_evt.cc
@@ -710,8 +710,8 @@ static uint32_t proc_initialize_msg(lgs_cb_t *cb,
lgsv_lgs_evt_t *evt) {
TRACE_ENTER2("dest %" PRIx64, evt->fr_dest);
// Client should try again when role changes is in transition.
- if (cb->is_quiesced_set) {
- TRACE("Log service is in quiesced state");
+ if (cb->is_quiesced_set || cb->is_terminating_set) {
+ TRACE("Log service is in quiesced/terminating");
ais_rc = SA_AIS_ERR_TRY_AGAIN;
goto snd_rsp;
}
@@ -774,8 +774,8 @@ static uint32_t proc_finalize_msg(lgs_cb_t *cb,
lgsv_lgs_evt_t *evt) {
TRACE_ENTER2("client_id %u", client_id);
// Client should try again when role changes is in transition.
- if (cb->is_quiesced_set) {
- TRACE("Log service is in quiesced state");
+ if (cb->is_quiesced_set || cb->is_terminating_set) {
+ TRACE("Log service is in quiesced/terminating");
ais_rc = SA_AIS_ERR_TRY_AGAIN;
goto snd_rsp;
}
@@ -1041,8 +1041,8 @@ static uint32_t proc_stream_open_msg(lgs_cb_t *cb,
lgsv_lgs_evt_t *evt) {
open_sync_param->client_id);
// Client should try again when role changes is in transition.
- if (cb->is_quiesced_set) {
- TRACE("Log service is in quiesced state");
+ if (cb->is_quiesced_set || cb->is_terminating_set) {
+ TRACE("Log service is in quiesced/terminating");
ais_rv = SA_AIS_ERR_TRY_AGAIN;
goto snd_rsp;
}
@@ -1199,8 +1199,8 @@ static uint32_t proc_stream_close_msg(lgs_cb_t *cb,
lgsv_lgs_evt_t *evt) {
close_param->lstr_id);
// Client should try again when role changes is in transition.
- if (cb->is_quiesced_set) {
- TRACE("Log service is in quiesced state");
+ if (cb->is_quiesced_set || cb->is_terminating_set) {
+ TRACE("Log service is in quiesced/terminating");
ais_rc = SA_AIS_ERR_TRY_AGAIN;
goto snd_rsp;
}
@@ -1299,8 +1299,8 @@ static uint32_t proc_write_log_async_msg(lgs_cb_t *cb,
lgsv_lgs_evt_t *evt) {
param->lstr_id, node_name);
// Client should try again when role changes is in transition.
- if (cb->is_quiesced_set) {
- TRACE("Log service is in quiesced state");
+ if (cb->is_quiesced_set || cb->is_terminating_set) {
+ TRACE("Log service is in quiesced/terminating");
error = SA_AIS_ERR_TRY_AGAIN;
goto done;
}
@@ -1547,7 +1547,8 @@ void lgs_process_mbx(SYSF_MBX *mbx) {
if (lgs_cb->ha_state == SA_AMF_HA_ACTIVE) {
if (msg->evt_type <= LGSV_LGS_EVT_LGA_DOWN) {
lgs_lgsv_top_level_evt_dispatch_tbl[msg->evt_type](msg);
- } else if (msg->evt_type == LGSV_EVT_QUIESCED_ACK) {
+ } else if (msg->evt_type == LGSV_EVT_QUIESCED_ACK &&
+ lgs_cb->is_terminating_set == false) {
proc_mds_quiesced_ack_msg(msg);
} else if (msg->evt_type == LGSV_EVT_NO_OP) {
TRACE("Jolted the main thread so it picks up the new IMM FD");
@@ -1565,5 +1566,7 @@ void lgs_process_mbx(SYSF_MBX *mbx) {
}
lgs_evt_destroy(msg);
+ } else if (lgs_cb->is_terminating_set) {
+ lgs_cb->is_terminating_set = false;
}
}
diff --git a/src/log/logd/lgs_main.cc b/src/log/logd/lgs_main.cc
index fe2f9a2b8..fe0d31caf 100644
--- a/src/log/logd/lgs_main.cc
+++ b/src/log/logd/lgs_main.cc
@@ -564,6 +564,14 @@ int main(int argc, char *argv[]) {
}
if (fds[FD_TERM].revents & POLLIN) {
+ // Process all requests in mailbox
+ lgs_cb->is_terminating_set = true;
+ if (fds[FD_MBX].revents & POLLIN) {
+ while (lgs_cb->is_terminating_set) {
+ lgs_process_mbx(&lgs_mbx);
+ }
+ }
+
daemon_exit();
}
--
2.11.0
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel