There are cases when opensafd startup is still ongoing and SMF will send
out a reboot command for a node. Because opensafd has taken a lock the
reboot command will not be able to call opensafd stop. It is suggested
that SMF tries to wait for the release of the lock with "opensafd
status". The waiting time is short and SMF continues with reboot even if
the lock is not released.

ticket #2459 allows SMF to query the status of opensafd.

- Refactor smf remote command to have two versions, one that logs errors of
  the endpoint command and one without error logging.
---
 src/smf/smfd/SmfUpgradeStep.cc |  23 ++++++++++
 src/smf/smfd/smfd_smfnd.c      | 102 +++++++++++++++++++++++++----------------
 src/smf/smfd/smfd_smfnd.h      |   4 ++
 3 files changed, 90 insertions(+), 39 deletions(-)

diff --git a/src/smf/smfd/SmfUpgradeStep.cc b/src/smf/smfd/SmfUpgradeStep.cc
index 2ffeab1..fc54019 100644
--- a/src/smf/smfd/SmfUpgradeStep.cc
+++ b/src/smf/smfd/SmfUpgradeStep.cc
@@ -54,6 +54,7 @@
 #include "smf/smfd/SmfRollback.h"
 #include "smf/smfd/SmfUtils.h"
 #include "osaf/immutil/immutil.h"
+#include "osaf/configmake.h"
 #include "smf/smfd/smfd_smfnd.h"
 #include "smfd.h"
 #include "base/osaf_time.h"
@@ -2299,6 +2300,28 @@ bool SmfUpgradeStep::nodeReboot() {
       goto done;
     }
 
+    // Try to make sure opensafd is not in startup phase otherwise reboot will
+    // not trigger opensafd stop.
+    int counter = 0;
+    while (counter < 5) {
+      TRACE("checking status of opensafd");
+      std::string command = LSBINITDIR;
+      command += "/opensafd status";
+      cmdrc = smfnd_remote_cmd(command.c_str(), &nodeDest,
+                               cliTimeout, localTimeout);
+      if ((cmdrc  & 0xffff0000) == SMFSV_CMD_RESULT_CODE &&
+          (cmdrc & 0xffff) == 150) {
+          // The lockfile is taken, try again
+          LOG_WA("opensafd status, retcode[%u] retry in 2 seconds",
+                 cmdrc & 0xffff);
+          struct timespec time = {2, 0};
+          osaf_nanosleep(&time);
+          counter += 1;
+      } else {
+        break;
+      }
+    }
+
     /* When executing a reboot command on a node the command will never return
        so we want a short local timeout. Since the smfnd is handling the
        cli timeout we want that to be much longer so that the reboot command
diff --git a/src/smf/smfd/smfd_smfnd.c b/src/smf/smfd/smfd_smfnd.c
index 23770ef..7384637 100644
--- a/src/smf/smfd/smfd_smfnd.c
+++ b/src/smf/smfd/smfd_smfnd.c
@@ -55,8 +55,10 @@ static SaVersionT clmVersion = {'B', 1, 1};
 
 static pthread_mutex_t smfnd_list_lock = PTHREAD_MUTEX_INITIALIZER;
 
-static uint32_t smfnd_remote_cmd(const char *i_cmd, MDS_DEST i_smfnd_dest,
-                                uint32_t i_timeout);
+static uint32_t smfnd_legacy_remote_cmd(const char *i_cmd,
+                                        MDS_DEST i_smfnd_dest,
+                                        uint32_t i_timeout);
+void log_rsp_errors(const char *i_cmd, uint32_t i_timeout, uint32_t i_result);
 
 /* ========================================================================
  *   FUNCTION PROTOTYPES
@@ -240,6 +242,23 @@ uint32_t smfnd_down(SaClmNodeIdT i_node_id)
 uint32_t smfnd_exec_remote_cmd(const char *i_cmd, const SmfndNodeDest *i_smfnd,
                               uint32_t i_timeout, uint32_t i_localTimeout)
 {
+        uint32_t result = smfnd_remote_cmd(i_cmd, i_smfnd, i_timeout,
+                                           i_localTimeout);
+        log_rsp_errors(i_cmd, i_timeout, result);
+        return result;
+}
+
+/**
+ * smfnd_remote_cmd
+ * @param i_cmd Remote command to be executed
+ * @param i_smfnd Info about the smfnd node where to execute
+ *                     the command
+ * @param i_timeout Max time the command may take in 10 ms
+ */
+uint32_t smfnd_remote_cmd(const char *i_cmd, const SmfndNodeDest *i_smfnd,
+                          uint32_t i_timeout, uint32_t i_localTimeout)
+
+{
        SMFSV_EVT cmd_req_asynch;
        SMFSV_EVT *cmd_rsp = 0;
        uint32_t rc;
@@ -270,7 +289,7 @@ uint32_t smfnd_exec_remote_cmd(const char *i_cmd, const 
SmfndNodeDest *i_smfnd,
        if (i_smfnd->rem_svc_pvt_ver == 1) {
                /* This addressed smfnd can only handle the old cmd req message
                 * format */
-               return smfnd_remote_cmd(i_cmd, i_smfnd->dest, i_timeout);
+               return smfnd_legacy_remote_cmd(i_cmd, i_smfnd->dest, i_timeout);
        }
 
        /* A new smfnd can handle the asynch message */
@@ -297,53 +316,58 @@ uint32_t smfnd_exec_remote_cmd(const char *i_cmd, const 
SmfndNodeDest *i_smfnd,
                return SMFSV_CMD_EXEC_FAILED;
        }
 
-       if (cmd_rsp->info.smfd.event.cmd_rsp.result != 0) { /* 0 = cmd OK */
-               switch (cmd_rsp->info.smfd.event.cmd_rsp.result & 0xffff0000) {
-               case SMFSV_CMD_EXEC_FAILED: {
-                       LOG_ER("Command %s failed to start (%u)", i_cmd,
-                              cmd_rsp->info.smfd.event.cmd_rsp.result &
-                                  0xffff);
-                       break;
-               }
-               case SMFSV_CMD_TIMEOUT: {
-                       LOG_ER("Command %s timed out (timeout %u ms)", i_cmd,
-                              i_timeout * 10);
-                       break;
-               }
-               case SMFSV_CMD_RESULT_CODE: {
-                       LOG_ER("Command %s returned error %u", i_cmd,
-                              cmd_rsp->info.smfd.event.cmd_rsp.result &
-                                  0xffff);
-                       break;
-               }
-               case SMFSV_CMD_SIGNAL_TERM: {
-                       LOG_ER("Command %s terminated by signal %u", i_cmd,
-                              cmd_rsp->info.smfd.event.cmd_rsp.result &
-                                  0xffff);
-                       break;
-               }
-               default: {
-                       LOG_ER("Command %s failed by unknown reason %x", i_cmd,
-                              cmd_rsp->info.smfd.event.cmd_rsp.result);
-                       break;
-               }
-               }
-       }
-
        rc = cmd_rsp->info.smfd.event.cmd_rsp.result;
        free(cmd_rsp);
        return rc;
 }
 
 /**
- * smfnd_remote_cmd
+ * @param i_cmd Name of remote command that was executed
+ * @param i_timeout Max time out for the remote command in 10 ms
+ * @param i_result Result code from smfnd_remote_cmd
+ */
+void log_rsp_errors(const char *i_cmd, uint32_t i_timeout, uint32_t i_result) {
+       if (i_result != 0) { /* 0 = cmd OK */
+               switch (i_result & 0xffff0000) {
+               case SMFSV_CMD_EXEC_FAILED: {
+                       LOG_ER("Command %s failed to start (%u)",
+                               i_cmd, i_result & 0xffff);
+                       break;
+               }
+               case SMFSV_CMD_TIMEOUT: {
+                       LOG_ER("Command %s timed out (timeout %u ms)",
+                               i_cmd, i_timeout * 10);
+                       break;
+               }
+               case SMFSV_CMD_RESULT_CODE: {
+                       LOG_ER("Command %s returned error %u",
+                               i_cmd, i_result & 0xffff);
+                       break;
+               }
+               case SMFSV_CMD_SIGNAL_TERM: {
+                       LOG_ER("Command %s terminated by signal %u",
+                               i_cmd, i_result & 0xffff);
+                       break;
+               }
+               default: {
+                       LOG_ER("Command %s failed by unknown reason %x",
+                               i_cmd, i_result);
+                       break;
+               }
+               } // switch
+       } // if
+}
+
+
+/**
+ * smfnd_legacy_remote_cmd
  * @param i_cmd Remote command to be executed
  * @param i_smfnd_dest Destination to the node where to execute
  *                     the command
  * @param i_timeout Max time the command may take
  */
-uint32_t smfnd_remote_cmd(const char *i_cmd, MDS_DEST i_smfnd_dest,
-                         uint32_t i_timeout)
+uint32_t smfnd_legacy_remote_cmd(const char *i_cmd, MDS_DEST i_smfnd_dest,
+                                uint32_t i_timeout)
 {
        SMFSV_EVT cmd_req;
        SMFSV_EVT *cmd_rsp = NULL;
diff --git a/src/smf/smfd/smfd_smfnd.h b/src/smf/smfd/smfd_smfnd.h
index 46892f8..2a8c2e9 100644
--- a/src/smf/smfd/smfd_smfnd.h
+++ b/src/smf/smfd/smfd_smfnd.h
@@ -80,6 +80,10 @@ uint32_t smfnd_down(SaClmNodeIdT node_id);
 bool smfnd_for_name(const char *i_nodeName, SmfndNodeDest *o_nodeDest);
 uint32_t smfnd_exec_remote_cmd(const char *i_cmd, const SmfndNodeDest *i_smfnd,
                                uint32_t i_timeout, uint32_t i_localTimeout);
+// Remote command without error logging for endpoint exit codes
+uint32_t smfnd_remote_cmd(const char *i_cmd, const SmfndNodeDest *i_smfnd,
+                          uint32_t i_timeout, uint32_t i_localTimeout);
+
 
 #ifdef __cplusplus
 }
-- 
2.7.4


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to