Ack

Thanks
Lennart

> -----Original Message-----
> From: Rafael Odzakow
> Sent: den 14 september 2017 13:34
> To: Lennart Lund <lennart.l...@ericsson.com>
> Cc: opensaf-devel@lists.sourceforge.net; Rafael Odzakow
> <rafael.odza...@ericsson.com>
> Subject: [PATCH 1/1] smf: try to wait for opensafd status before reboot
> [#2464]
> 
> There are cases when opensafd startup is still ongoing and SMF will send
> out a reboot command for a node. Because opensafd has taken a lock the
> reboot command will not be able to call opensafd stop. It is suggested
> that SMF tries to wait for the release of the lock with "opensafd
> status". The waiting time is short and SMF continues with reboot even if
> the lock is not released.
> 
> - Change smf remote command to have two versions, one that logs errors of
>   the endpoint command and one without error logging.
> ---
>  src/smf/smfd/SmfUpgradeStep.cc |  24 ++++++++++
>  src/smf/smfd/smfd_smfnd.c      | 100 ++++++++++++++++++++++++++------
> ---------
>  src/smf/smfd/smfd_smfnd.h      |   8 +++-
>  3 files changed, 95 insertions(+), 37 deletions(-)
> 
> diff --git a/src/smf/smfd/SmfUpgradeStep.cc
> b/src/smf/smfd/SmfUpgradeStep.cc
> index ddd96a0..d40fac4 100644
> --- a/src/smf/smfd/SmfUpgradeStep.cc
> +++ b/src/smf/smfd/SmfUpgradeStep.cc
> @@ -54,6 +54,7 @@
>  #include "smf/smfd/SmfRollback.h"
>  #include "smf/smfd/SmfUtils.h"
>  #include "osaf/immutil/immutil.h"
> +#include "osaf/configmake.h"
>  #include "smf/smfd/smfd_smfnd.h"
>  #include "smfd.h"
>  #include "base/osaf_time.h"
> @@ -2316,6 +2317,29 @@ bool SmfUpgradeStep::nodeReboot() {
>        goto done;
>      }
> 
> +    // Wait for a good opensafd status. If retrying fails give up and reboot
> +    // anyway.
> +    int counter = 0;
> +    while (counter < 5) {
> +      TRACE("check status of opensafd");
> +      std::string command = LSBINITDIR;
> +      command += "/opensafd status";
> +      cmdrc = smfnd_exec_remote_cmdnolog(command.c_str(), &nodeDest,
> +                                         cliTimeout, localTimeout);
> +      if ((cmdrc  & 0xffff0000) == SMFSV_CMD_RESULT_CODE &&
> +          (cmdrc & 0xffff) != 0) {
> +          // The status is not OK, try again
> +          LOG_WA("opensafd status, retcode[%u] retry in 2 seconds",
> +                 cmdrc & 0xffff);
> +          struct timespec time = {2, 0};
> +          osaf_nanosleep(&time);
> +          counter += 1;
> +      } else {
> +        LOG_NO("opensafd status OK");
> +        break;
> +      }
> +    }
> +
>      /* When executing a reboot command on a node the command will never
> return
>         so we want a short local timeout. Since the smfnd is handling the
>         cli timeout we want that to be much longer so that the reboot command
> diff --git a/src/smf/smfd/smfd_smfnd.c b/src/smf/smfd/smfd_smfnd.c
> index 7aadaf9..c48adb2 100644
> --- a/src/smf/smfd/smfd_smfnd.c
> +++ b/src/smf/smfd/smfd_smfnd.c
> @@ -230,6 +230,44 @@ uint32_t smfnd_down(SaClmNodeIdT i_node_id)
>       return NCSCC_RC_FAILURE;
>  }
> 
> +/*
> +   Log errors from smfnd_main_remote_command
> + * @param i_cmd Name of remote command that was executed
> + * @param i_timeout Max time out for the remote command in 10 ms
> + * @param i_result Result code from smfnd_main_remote_cmd
> +*/
> +void log_rsp_errors(const char *i_cmd, uint32_t i_timeout, uint32_t
> i_result) {
> +  if (i_result != 0) { /* 0 = cmd OK */
> +    switch (i_result & 0xffff0000) {
> +      case SMFSV_CMD_EXEC_FAILED: {
> +        LOG_ER("Command %s failed to start (%u)",
> +               i_cmd, i_result & 0xffff);
> +        break;
> +      }
> +      case SMFSV_CMD_TIMEOUT: {
> +        LOG_ER("Command %s timed out (timeout %u ms)",
> +               i_cmd, i_timeout * 10);
> +        break;
> +      }
> +      case SMFSV_CMD_RESULT_CODE: {
> +        LOG_ER("Command %s returned error %u",
> +               i_cmd, i_result & 0xffff);
> +        break;
> +      }
> +      case SMFSV_CMD_SIGNAL_TERM: {
> +        LOG_ER("Command %s terminated by signal %u",
> +               i_cmd, i_result & 0xffff);
> +        break;
> +      }
> +      default: {
> +        LOG_ER("Command %s failed by unknown reason %x",
> +               i_cmd, i_result);
> +        break;
> +      }
> +    } // switch
> +  } // if
> +}
> +
>  /**
>   * smfnd_exec_remote_cmd
>   * @param i_cmd Remote command to be executed
> @@ -237,9 +275,11 @@ uint32_t smfnd_down(SaClmNodeIdT i_node_id)
>   *                     the command
>   * @param i_timeout Max time the command may take in 10 ms
>   */
> -uint32_t smfnd_exec_remote_cmd(const char *i_cmd, const
> SmfndNodeDest *i_smfnd,
> -                            uint32_t i_timeout, uint32_t i_localTimeout)
> -{
> +uint32_t smfnd_main_remote_cmd(const char *i_cmd,
> +                               const SmfndNodeDest *i_smfnd,
> +                               uint32_t i_timeout,
> +                               uint32_t i_localTimeout,
> +                               bool error_log) {
>       SMFSV_EVT cmd_req_asynch;
>       SMFSV_EVT *cmd_rsp = 0;
>       uint32_t rc;
> @@ -297,44 +337,32 @@ uint32_t smfnd_exec_remote_cmd(const char
> *i_cmd, const SmfndNodeDest *i_smfnd,
>               return SMFSV_CMD_EXEC_FAILED;
>       }
> 
> -     if (cmd_rsp->info.smfd.event.cmd_rsp.result != 0) { /* 0 = cmd OK */
> -             switch (cmd_rsp->info.smfd.event.cmd_rsp.result &
> 0xffff0000) {
> -             case SMFSV_CMD_EXEC_FAILED: {
> -                     LOG_ER("Command %s failed to start (%u)", i_cmd,
> -                            cmd_rsp->info.smfd.event.cmd_rsp.result &
> -                                0xffff);
> -                     break;
> -             }
> -             case SMFSV_CMD_TIMEOUT: {
> -                     LOG_ER("Command %s timed out (timeout %u ms)",
> i_cmd,
> -                            i_timeout * 10);
> -                     break;
> -             }
> -             case SMFSV_CMD_RESULT_CODE: {
> -                     LOG_ER("Command %s returned error %u", i_cmd,
> -                            cmd_rsp->info.smfd.event.cmd_rsp.result &
> -                                0xffff);
> -                     break;
> -             }
> -             case SMFSV_CMD_SIGNAL_TERM: {
> -                     LOG_ER("Command %s terminated by signal %u",
> i_cmd,
> -                            cmd_rsp->info.smfd.event.cmd_rsp.result &
> -                                0xffff);
> -                     break;
> -             }
> -             default: {
> -                     LOG_ER("Command %s failed by unknown reason
> %x", i_cmd,
> -                            cmd_rsp->info.smfd.event.cmd_rsp.result);
> -                     break;
> -             }
> -             }
> -     }
> -
>       rc = cmd_rsp->info.smfd.event.cmd_rsp.result;
> +
> +        if (error_log) {
> +          log_rsp_errors(i_cmd, i_timeout, rc);
> +        }
> +
>       free(cmd_rsp);
>       return rc;
>  }
> 
> +uint32_t smfnd_exec_remote_cmd(const char *i_cmd,
> +                               const SmfndNodeDest *i_smfnd,
> +                            uint32_t i_timeout,
> +                               uint32_t i_localTimeout) {
> +  return smfnd_main_remote_cmd(i_cmd, i_smfnd,
> +                               i_timeout, i_localTimeout, true);
> +}
> +
> +uint32_t smfnd_exec_remote_cmdnolog(const char *i_cmd,
> +                                    const SmfndNodeDest *i_smfnd,
> +                                    uint32_t i_timeout,
> +                                    uint32_t i_localTimeout) {
> +  return smfnd_main_remote_cmd(i_cmd, i_smfnd,
> +                               i_timeout, i_localTimeout, false);
> +}
> +
>  /**
>   * smfnd_remote_cmd
>   * @param i_cmd Remote command to be executed
> diff --git a/src/smf/smfd/smfd_smfnd.h b/src/smf/smfd/smfd_smfnd.h
> index 6c227a5..fed706f 100644
> --- a/src/smf/smfd/smfd_smfnd.h
> +++ b/src/smf/smfd/smfd_smfnd.h
> @@ -80,7 +80,13 @@ uint32_t smfnd_down(SaClmNodeIdT node_id);
>  bool smfnd_for_name(const char *i_nodeName, SmfndNodeDest
> *o_nodeDest);
>  uint32_t smfnd_exec_remote_cmd(const char *i_cmd, const
> SmfndNodeDest *i_smfnd,
>                                 uint32_t i_timeout, uint32_t i_localTimeout);
> -
> +/*
> + * Execute remote command without logging the errors of the remote
> command.
> +*/
> +uint32_t smfnd_exec_remote_cmdnolog(const char *i_cmd,
> +                                    const SmfndNodeDest *i_smfnd,
> +                                    uint32_t i_timeout,
> +                                    uint32_t i_localTimeout);
>  #ifdef __cplusplus
>  }
>  #endif
> --
> 2.7.4


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to