The function getNodeDestination() reset elapsedTime to zero cause the node reboot timeout at waitForNodeDestination() never reach. If scenario that node reboot cannot come back then campaign is stuck in executing forever until cluster reset. --- src/smf/smfd/SmfUpgradeStep.cc | 1 + src/smf/smfd/SmfUtils.cc | 11 ++++------- 2 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/src/smf/smfd/SmfUpgradeStep.cc b/src/smf/smfd/SmfUpgradeStep.cc index 4c0ddd192..80da668de 100644 --- a/src/smf/smfd/SmfUpgradeStep.cc +++ b/src/smf/smfd/SmfUpgradeStep.cc @@ -2399,6 +2399,7 @@ bool SmfUpgradeStep::nodeReboot() { "SmfUpgradeStep::nodeReboot: Waiting to get node destination with increased UP counter"); while (true) { + elapsedTime = 0; for (nodeIt = rebootedNodeList.begin(); nodeIt != rebootedNodeList.end();) { if (getNodeDestination((*nodeIt).node_name, &nodeDest, &elapsedTime, -1)) { diff --git a/src/smf/smfd/SmfUtils.cc b/src/smf/smfd/SmfUtils.cc index 915c086a5..4ac5af163 100644 --- a/src/smf/smfd/SmfUtils.cc +++ b/src/smf/smfd/SmfUtils.cc @@ -95,9 +95,6 @@ bool getNodeDestination(const std::string &i_node, SmfndNodeDest *o_nodeDest, TRACE("Find destination for node '%s'", i_node.c_str()); - if (elapsedTime) // Initialize elapsedTime to zero. - *elapsedTime = 0; - /* It seems SaAmfNode objects can be stored, but the code * indicates that SaClmNode's are expected. Anyway an attempt * to go for it is probably faster that examining IMM classes @@ -133,10 +130,10 @@ bool getNodeDestination(const std::string &i_node, SmfndNodeDest *o_nodeDest, } struct timespec time = {2 * ONE_SECOND, 0}; osaf_nanosleep(&time); - timeout--; + timeout -= 2; if (elapsedTime) *elapsedTime = *elapsedTime + 2 * ONE_SECOND; if (maxWaitTime != -1) { - if (*elapsedTime >= maxWaitTime) { + if ((elapsedTime) && (*elapsedTime >= maxWaitTime)) { LOG_NO("Failed to get node dest for clm node %s", i_node.c_str()); return false; } @@ -165,11 +162,11 @@ bool getNodeDestination(const std::string &i_node, SmfndNodeDest *o_nodeDest, } struct timespec time = {2 * ONE_SECOND, 0}; osaf_nanosleep(&time); - timeout--; + timeout -= 2; if (elapsedTime) *elapsedTime = *elapsedTime + 2 * ONE_SECOND; if (maxWaitTime != -1) { - if (*elapsedTime >= maxWaitTime) { + if ((elapsedTime) && (*elapsedTime >= maxWaitTime)) { LOG_NO("Failed to get node dest for clm node %s", i_node.c_str()); free(nodeName); return false; -- 2.18.0 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel