Ack Thanks Lennart
> -----Original Message----- > From: Rafael Odzakow > Sent: den 14 september 2017 13:34 > To: Lennart Lund <lennart.l...@ericsson.com> > Cc: opensaf-devel@lists.sourceforge.net; Rafael Odzakow > <rafael.odza...@ericsson.com> > Subject: [PATCH 1/1] smf: try to wait for opensafd status before reboot > [#2464] > > There are cases when opensafd startup is still ongoing and SMF will send > out a reboot command for a node. Because opensafd has taken a lock the > reboot command will not be able to call opensafd stop. It is suggested > that SMF tries to wait for the release of the lock with "opensafd > status". The waiting time is short and SMF continues with reboot even if > the lock is not released. > > - Change smf remote command to have two versions, one that logs errors of > the endpoint command and one without error logging. > --- > src/smf/smfd/SmfUpgradeStep.cc | 24 ++++++++++ > src/smf/smfd/smfd_smfnd.c | 100 ++++++++++++++++++++++++++------ > --------- > src/smf/smfd/smfd_smfnd.h | 8 +++- > 3 files changed, 95 insertions(+), 37 deletions(-) > > diff --git a/src/smf/smfd/SmfUpgradeStep.cc > b/src/smf/smfd/SmfUpgradeStep.cc > index ddd96a0..d40fac4 100644 > --- a/src/smf/smfd/SmfUpgradeStep.cc > +++ b/src/smf/smfd/SmfUpgradeStep.cc > @@ -54,6 +54,7 @@ > #include "smf/smfd/SmfRollback.h" > #include "smf/smfd/SmfUtils.h" > #include "osaf/immutil/immutil.h" > +#include "osaf/configmake.h" > #include "smf/smfd/smfd_smfnd.h" > #include "smfd.h" > #include "base/osaf_time.h" > @@ -2316,6 +2317,29 @@ bool SmfUpgradeStep::nodeReboot() { > goto done; > } > > + // Wait for a good opensafd status. If retrying fails give up and reboot > + // anyway. > + int counter = 0; > + while (counter < 5) { > + TRACE("check status of opensafd"); > + std::string command = LSBINITDIR; > + command += "/opensafd status"; > + cmdrc = smfnd_exec_remote_cmdnolog(command.c_str(), &nodeDest, > + cliTimeout, localTimeout); > + if ((cmdrc & 0xffff0000) == SMFSV_CMD_RESULT_CODE && > + (cmdrc & 0xffff) != 0) { > + // The status is not OK, try again > + LOG_WA("opensafd status, retcode[%u] retry in 2 seconds", > + cmdrc & 0xffff); > + struct timespec time = {2, 0}; > + osaf_nanosleep(&time); > + counter += 1; > + } else { > + LOG_NO("opensafd status OK"); > + break; > + } > + } > + > /* When executing a reboot command on a node the command will never > return > so we want a short local timeout. Since the smfnd is handling the > cli timeout we want that to be much longer so that the reboot command > diff --git a/src/smf/smfd/smfd_smfnd.c b/src/smf/smfd/smfd_smfnd.c > index 7aadaf9..c48adb2 100644 > --- a/src/smf/smfd/smfd_smfnd.c > +++ b/src/smf/smfd/smfd_smfnd.c > @@ -230,6 +230,44 @@ uint32_t smfnd_down(SaClmNodeIdT i_node_id) > return NCSCC_RC_FAILURE; > } > > +/* > + Log errors from smfnd_main_remote_command > + * @param i_cmd Name of remote command that was executed > + * @param i_timeout Max time out for the remote command in 10 ms > + * @param i_result Result code from smfnd_main_remote_cmd > +*/ > +void log_rsp_errors(const char *i_cmd, uint32_t i_timeout, uint32_t > i_result) { > + if (i_result != 0) { /* 0 = cmd OK */ > + switch (i_result & 0xffff0000) { > + case SMFSV_CMD_EXEC_FAILED: { > + LOG_ER("Command %s failed to start (%u)", > + i_cmd, i_result & 0xffff); > + break; > + } > + case SMFSV_CMD_TIMEOUT: { > + LOG_ER("Command %s timed out (timeout %u ms)", > + i_cmd, i_timeout * 10); > + break; > + } > + case SMFSV_CMD_RESULT_CODE: { > + LOG_ER("Command %s returned error %u", > + i_cmd, i_result & 0xffff); > + break; > + } > + case SMFSV_CMD_SIGNAL_TERM: { > + LOG_ER("Command %s terminated by signal %u", > + i_cmd, i_result & 0xffff); > + break; > + } > + default: { > + LOG_ER("Command %s failed by unknown reason %x", > + i_cmd, i_result); > + break; > + } > + } // switch > + } // if > +} > + > /** > * smfnd_exec_remote_cmd > * @param i_cmd Remote command to be executed > @@ -237,9 +275,11 @@ uint32_t smfnd_down(SaClmNodeIdT i_node_id) > * the command > * @param i_timeout Max time the command may take in 10 ms > */ > -uint32_t smfnd_exec_remote_cmd(const char *i_cmd, const > SmfndNodeDest *i_smfnd, > - uint32_t i_timeout, uint32_t i_localTimeout) > -{ > +uint32_t smfnd_main_remote_cmd(const char *i_cmd, > + const SmfndNodeDest *i_smfnd, > + uint32_t i_timeout, > + uint32_t i_localTimeout, > + bool error_log) { > SMFSV_EVT cmd_req_asynch; > SMFSV_EVT *cmd_rsp = 0; > uint32_t rc; > @@ -297,44 +337,32 @@ uint32_t smfnd_exec_remote_cmd(const char > *i_cmd, const SmfndNodeDest *i_smfnd, > return SMFSV_CMD_EXEC_FAILED; > } > > - if (cmd_rsp->info.smfd.event.cmd_rsp.result != 0) { /* 0 = cmd OK */ > - switch (cmd_rsp->info.smfd.event.cmd_rsp.result & > 0xffff0000) { > - case SMFSV_CMD_EXEC_FAILED: { > - LOG_ER("Command %s failed to start (%u)", i_cmd, > - cmd_rsp->info.smfd.event.cmd_rsp.result & > - 0xffff); > - break; > - } > - case SMFSV_CMD_TIMEOUT: { > - LOG_ER("Command %s timed out (timeout %u ms)", > i_cmd, > - i_timeout * 10); > - break; > - } > - case SMFSV_CMD_RESULT_CODE: { > - LOG_ER("Command %s returned error %u", i_cmd, > - cmd_rsp->info.smfd.event.cmd_rsp.result & > - 0xffff); > - break; > - } > - case SMFSV_CMD_SIGNAL_TERM: { > - LOG_ER("Command %s terminated by signal %u", > i_cmd, > - cmd_rsp->info.smfd.event.cmd_rsp.result & > - 0xffff); > - break; > - } > - default: { > - LOG_ER("Command %s failed by unknown reason > %x", i_cmd, > - cmd_rsp->info.smfd.event.cmd_rsp.result); > - break; > - } > - } > - } > - > rc = cmd_rsp->info.smfd.event.cmd_rsp.result; > + > + if (error_log) { > + log_rsp_errors(i_cmd, i_timeout, rc); > + } > + > free(cmd_rsp); > return rc; > } > > +uint32_t smfnd_exec_remote_cmd(const char *i_cmd, > + const SmfndNodeDest *i_smfnd, > + uint32_t i_timeout, > + uint32_t i_localTimeout) { > + return smfnd_main_remote_cmd(i_cmd, i_smfnd, > + i_timeout, i_localTimeout, true); > +} > + > +uint32_t smfnd_exec_remote_cmdnolog(const char *i_cmd, > + const SmfndNodeDest *i_smfnd, > + uint32_t i_timeout, > + uint32_t i_localTimeout) { > + return smfnd_main_remote_cmd(i_cmd, i_smfnd, > + i_timeout, i_localTimeout, false); > +} > + > /** > * smfnd_remote_cmd > * @param i_cmd Remote command to be executed > diff --git a/src/smf/smfd/smfd_smfnd.h b/src/smf/smfd/smfd_smfnd.h > index 6c227a5..fed706f 100644 > --- a/src/smf/smfd/smfd_smfnd.h > +++ b/src/smf/smfd/smfd_smfnd.h > @@ -80,7 +80,13 @@ uint32_t smfnd_down(SaClmNodeIdT node_id); > bool smfnd_for_name(const char *i_nodeName, SmfndNodeDest > *o_nodeDest); > uint32_t smfnd_exec_remote_cmd(const char *i_cmd, const > SmfndNodeDest *i_smfnd, > uint32_t i_timeout, uint32_t i_localTimeout); > - > +/* > + * Execute remote command without logging the errors of the remote > command. > +*/ > +uint32_t smfnd_exec_remote_cmdnolog(const char *i_cmd, > + const SmfndNodeDest *i_smfnd, > + uint32_t i_timeout, > + uint32_t i_localTimeout); > #ifdef __cplusplus > } > #endif > -- > 2.7.4 ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel