On 10/04/2017 11:35 AM, Hans Nordebäck wrote:
Hi Rafael,

I have one question on this patch below. /Thanks HansN

-----Original Message-----
From: Lennart Lund [mailto:[email protected]]
Sent: den 25 september 2017 13:44
To: Rafael Odzakow <[email protected]>
Cc: [email protected]
Subject: Re: [devel] [PATCH 1/1] smf: try to wait for opensafd status before 
reboot [#2464]

Ack

Thanks
Lennart

-----Original Message-----
From: Rafael Odzakow
Sent: den 14 september 2017 13:34
To: Lennart Lund <[email protected]>
Cc: [email protected]; Rafael Odzakow
<[email protected]>
Subject: [PATCH 1/1] smf: try to wait for opensafd status before
reboot [#2464]

There are cases when opensafd startup is still ongoing and SMF will
send out a reboot command for a node. Because opensafd has taken a
lock the reboot command will not be able to call opensafd stop. It is
suggested that SMF tries to wait for the release of the lock with
"opensafd status". The waiting time is short and SMF continues with
reboot even if the lock is not released.

- Change smf remote command to have two versions, one that logs errors of
   the endpoint command and one without error logging.
---
  src/smf/smfd/SmfUpgradeStep.cc |  24 ++++++++++
  src/smf/smfd/smfd_smfnd.c      | 100 ++++++++++++++++++++++++++------
---------
  src/smf/smfd/smfd_smfnd.h      |   8 +++-
  3 files changed, 95 insertions(+), 37 deletions(-)

diff --git a/src/smf/smfd/SmfUpgradeStep.cc
b/src/smf/smfd/SmfUpgradeStep.cc index ddd96a0..d40fac4 100644
--- a/src/smf/smfd/SmfUpgradeStep.cc
+++ b/src/smf/smfd/SmfUpgradeStep.cc
@@ -54,6 +54,7 @@
  #include "smf/smfd/SmfRollback.h"
  #include "smf/smfd/SmfUtils.h"
  #include "osaf/immutil/immutil.h"
+#include "osaf/configmake.h"
  #include "smf/smfd/smfd_smfnd.h"
  #include "smfd.h"
  #include "base/osaf_time.h"
@@ -2316,6 +2317,29 @@ bool SmfUpgradeStep::nodeReboot() {
        goto done;
      }

+    // Wait for a good opensafd status. If retrying fails give up and reboot
+    // anyway.
+    int counter = 0;
+    while (counter < 5) {
+      TRACE("check status of opensafd");
+      std::string command = LSBINITDIR;
+      command += "/opensafd status";
+      cmdrc = smfnd_exec_remote_cmdnolog(command.c_str(), &nodeDest,
+                                         cliTimeout, localTimeout);
[HansN]  is the below code correct, shouldn't it be the following?:
       if ((cmdrc  & 0xffff0000) == SMFSV_CMD_RESULT_CODE &&
           (cmdrc & 0xffff) == 0) {
         LOG_NO("opensafd status OK");
         break;
       } else {
          // The status is not OK, try again
           LOG_WA("opensafd status, retcode[%u] retry in 2 seconds",
                  cmdrc & 0xffff);
           struct timespec time = {2, 0};
           osaf_nanosleep(&time);
           counter += 1;
       }
Instead of:
+      if ((cmdrc  & 0xffff0000) == SMFSV_CMD_RESULT_CODE &&
+          (cmdrc & 0xffff) != 0) {
+          // The status is not OK, try again
+          LOG_WA("opensafd status, retcode[%u] retry in 2 seconds",
+                 cmdrc & 0xffff);
+          struct timespec time = {2, 0};
+          osaf_nanosleep(&time);
+          counter += 1;
+      } else {
+        LOG_NO("opensafd status OK");
+        break;
+      }
+    }
+
 [Rafael] It is true that MDS errors are not handled for this call. If MDS issues arise this function continues to a reboot directly which is actually preferable. Having a MDS errors here means that something is already wrong with the system so there is no use to hang around for a good state. Improvement is to add an extra check before logging OK and to add which node is affected.

      /* When executing a reboot command on a node the command will
never return
         so we want a short local timeout. Since the smfnd is handling the
         cli timeout we want that to be much longer so that the reboot
command diff --git a/src/smf/smfd/smfd_smfnd.c
b/src/smf/smfd/smfd_smfnd.c index 7aadaf9..c48adb2 100644
--- a/src/smf/smfd/smfd_smfnd.c
+++ b/src/smf/smfd/smfd_smfnd.c
@@ -230,6 +230,44 @@ uint32_t smfnd_down(SaClmNodeIdT i_node_id)
        return NCSCC_RC_FAILURE;
  }

+/*
+   Log errors from smfnd_main_remote_command
+ * @param i_cmd Name of remote command that was executed
+ * @param i_timeout Max time out for the remote command in 10 ms
+ * @param i_result Result code from smfnd_main_remote_cmd */ void
+log_rsp_errors(const char *i_cmd, uint32_t i_timeout, uint32_t
i_result) {
+  if (i_result != 0) { /* 0 = cmd OK */
+    switch (i_result & 0xffff0000) {
+      case SMFSV_CMD_EXEC_FAILED: {
+        LOG_ER("Command %s failed to start (%u)",
+               i_cmd, i_result & 0xffff);
+        break;
+      }
+      case SMFSV_CMD_TIMEOUT: {
+        LOG_ER("Command %s timed out (timeout %u ms)",
+               i_cmd, i_timeout * 10);
+        break;
+      }
+      case SMFSV_CMD_RESULT_CODE: {
+        LOG_ER("Command %s returned error %u",
+               i_cmd, i_result & 0xffff);
+        break;
+      }
+      case SMFSV_CMD_SIGNAL_TERM: {
+        LOG_ER("Command %s terminated by signal %u",
+               i_cmd, i_result & 0xffff);
+        break;
+      }
+      default: {
+        LOG_ER("Command %s failed by unknown reason %x",
+               i_cmd, i_result);
+        break;
+      }
+    } // switch
+  } // if
+}
+
  /**
   * smfnd_exec_remote_cmd
   * @param i_cmd Remote command to be executed @@ -237,9 +275,11 @@
uint32_t smfnd_down(SaClmNodeIdT i_node_id)
   *                     the command
   * @param i_timeout Max time the command may take in 10 ms
   */
-uint32_t smfnd_exec_remote_cmd(const char *i_cmd, const SmfndNodeDest
*i_smfnd,
-                              uint32_t i_timeout, uint32_t i_localTimeout)
-{
+uint32_t smfnd_main_remote_cmd(const char *i_cmd,
+                               const SmfndNodeDest *i_smfnd,
+                               uint32_t i_timeout,
+                               uint32_t i_localTimeout,
+                               bool error_log) {
        SMFSV_EVT cmd_req_asynch;
        SMFSV_EVT *cmd_rsp = 0;
        uint32_t rc;
@@ -297,44 +337,32 @@ uint32_t smfnd_exec_remote_cmd(const char
*i_cmd, const SmfndNodeDest *i_smfnd,
                return SMFSV_CMD_EXEC_FAILED;
        }

-       if (cmd_rsp->info.smfd.event.cmd_rsp.result != 0) { /* 0 = cmd OK */
-               switch (cmd_rsp->info.smfd.event.cmd_rsp.result &
0xffff0000) {
-               case SMFSV_CMD_EXEC_FAILED: {
-                       LOG_ER("Command %s failed to start (%u)", i_cmd,
-                              cmd_rsp->info.smfd.event.cmd_rsp.result &
-                                  0xffff);
-                       break;
-               }
-               case SMFSV_CMD_TIMEOUT: {
-                       LOG_ER("Command %s timed out (timeout %u ms)",
i_cmd,
-                              i_timeout * 10);
-                       break;
-               }
-               case SMFSV_CMD_RESULT_CODE: {
-                       LOG_ER("Command %s returned error %u", i_cmd,
-                              cmd_rsp->info.smfd.event.cmd_rsp.result &
-                                  0xffff);
-                       break;
-               }
-               case SMFSV_CMD_SIGNAL_TERM: {
-                       LOG_ER("Command %s terminated by signal %u",
i_cmd,
-                              cmd_rsp->info.smfd.event.cmd_rsp.result &
-                                  0xffff);
-                       break;
-               }
-               default: {
-                       LOG_ER("Command %s failed by unknown reason
%x", i_cmd,
-                              cmd_rsp->info.smfd.event.cmd_rsp.result);
-                       break;
-               }
-               }
-       }
-
        rc = cmd_rsp->info.smfd.event.cmd_rsp.result;
+
+        if (error_log) {
+          log_rsp_errors(i_cmd, i_timeout, rc);
+        }
+
        free(cmd_rsp);
        return rc;
  }

+uint32_t smfnd_exec_remote_cmd(const char *i_cmd,
+                               const SmfndNodeDest *i_smfnd,
+                              uint32_t i_timeout,
+                               uint32_t i_localTimeout) {
+  return smfnd_main_remote_cmd(i_cmd, i_smfnd,
+                               i_timeout, i_localTimeout, true); }
+
+uint32_t smfnd_exec_remote_cmdnolog(const char *i_cmd,
+                                    const SmfndNodeDest *i_smfnd,
+                                    uint32_t i_timeout,
+                                    uint32_t i_localTimeout) {
+  return smfnd_main_remote_cmd(i_cmd, i_smfnd,
+                               i_timeout, i_localTimeout, false); }
+
  /**
   * smfnd_remote_cmd
   * @param i_cmd Remote command to be executed diff --git
a/src/smf/smfd/smfd_smfnd.h b/src/smf/smfd/smfd_smfnd.h index
6c227a5..fed706f 100644
--- a/src/smf/smfd/smfd_smfnd.h
+++ b/src/smf/smfd/smfd_smfnd.h
@@ -80,7 +80,13 @@ uint32_t smfnd_down(SaClmNodeIdT node_id);  bool
smfnd_for_name(const char *i_nodeName, SmfndNodeDest *o_nodeDest);
uint32_t smfnd_exec_remote_cmd(const char *i_cmd, const SmfndNodeDest
*i_smfnd,
                                 uint32_t i_timeout, uint32_t
i_localTimeout);
-
+/*
+ * Execute remote command without logging the errors of the remote
command.
+*/
+uint32_t smfnd_exec_remote_cmdnolog(const char *i_cmd,
+                                    const SmfndNodeDest *i_smfnd,
+                                    uint32_t i_timeout,
+                                    uint32_t i_localTimeout);
  #ifdef __cplusplus
  }
  #endif
--
2.7.4

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most engaging tech 
sites, Slashdot.org! http://sdm.link/slashdot 
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel



------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to