Repository: trafodion Updated Branches: refs/heads/master 392c1ed9a -> c83471122
TRAFODION-2885 enhancement Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/5f0e2c29 Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/5f0e2c29 Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/5f0e2c29 Branch: refs/heads/master Commit: 5f0e2c299891399698ee72e746433de03fd70df3 Parents: 4fe1e19 Author: aven <[email protected]> Authored: Wed Jan 24 10:44:07 2018 +0800 Committer: aven <[email protected]> Committed: Wed Jan 24 10:44:07 2018 +0800 ---------------------------------------------------------------------- dcs/bin/dcs-daemon.sh | 2 +- .../org/trafodion/dcs/master/ServerManager.java | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafodion/blob/5f0e2c29/dcs/bin/dcs-daemon.sh ---------------------------------------------------------------------- diff --git a/dcs/bin/dcs-daemon.sh b/dcs/bin/dcs-daemon.sh index 530d356..a331c3e 100755 --- a/dcs/bin/dcs-daemon.sh +++ b/dcs/bin/dcs-daemon.sh @@ -145,7 +145,7 @@ case $startStop in if [ -f $pid ]; then if kill -0 `cat $pid` > /dev/null 2>&1; then echo $command `cat $pid`. Stop it first. - exit 1 + exit -2 fi fi http://git-wip-us.apache.org/repos/asf/trafodion/blob/5f0e2c29/dcs/src/main/java/org/trafodion/dcs/master/ServerManager.java ---------------------------------------------------------------------- diff --git a/dcs/src/main/java/org/trafodion/dcs/master/ServerManager.java b/dcs/src/main/java/org/trafodion/dcs/master/ServerManager.java index 37c7963..a92f25a 100644 --- a/dcs/src/main/java/org/trafodion/dcs/master/ServerManager.java +++ b/dcs/src/main/java/org/trafodion/dcs/master/ServerManager.java @@ -318,8 +318,12 @@ public class ServerManager implements Callable { RestartHandler handler = restartQueue.poll(); Future<ScriptContext> runner = pool.submit(handler); ScriptContext scriptContext = runner.get();// blocking call - if (scriptContext.getExitCode() != 0) + // In some situation, there may restart dcs server replicated. + // Exit code == -2 means dcs server had been started, + // no needs to add to restart queue. + if (scriptContext.getExitCode() != 0 && scriptContext.getExitCode() != -2) { restartQueue.add(handler); + } } try { @@ -501,7 +505,7 @@ public class ServerManager implements Callable { } private void getUnwathedServers() { - // In some situation, if DCS Server does not have znode info in zookeeper + // In some situation when open HA, if DCS Server does not have znode info in zookeeper // when DCS Master is starting, then server will never be watched by zookeeper, // and if it downs, it will never be restarted. @@ -511,9 +515,13 @@ public class ServerManager implements Callable { // hostName + ":" + instance + ":" + infoPort + ":" + serverStartTimestamp // eg : gy26.esgyncn.local:3:24413:1515056285028 // RestartHandler need to know hostName, instanceNum(lineNum), serverStartTimestamp(for if condition) - if (runningServers.size() == configuredServers.size()) { + if (!master.isFollower() || runningServers.size() == configuredServers.size()) { if (LOG.isDebugEnabled()) { - LOG.debug("all dcs servers have started, no need to add watchers"); + if (!master.isFollower()) { + LOG.debug("dcs master start normally, no need to add watchers"); + } else { + LOG.debug("backup master start, all dcs servers have started, no need to add watchers"); + } } return; }
