Github user selvaganesang commented on a diff in the pull request: https://github.com/apache/incubator-trafodion/pull/1144#discussion_r124095950 --- Diff: dcs/src/main/java/org/trafodion/dcs/server/ServerManager.java --- @@ -414,32 +431,40 @@ public Boolean call() throws Exception { // finish if (f != null) { Integer result = f.get(); + LOG.debug("Server handler [" + instance + ":" + result + "] finished"); int childInstance = result.intValue(); // get the node id - boolean isRunning = serverHandlers[childInstance-1].serverMonitor.isPidRunning(); - String nid = serverHandlers[childInstance-1].serverMonitor.nid; - String pid = serverHandlers[childInstance-1].serverMonitor.pid; - serverHandlers[childInstance-1] = null; - LOG.debug("Server handler [" + instance + ":" + result - + "] finished, restarting"); - if (isRunning) - LOG.info("mxosrvr " + nid + "," + pid + " still running"); - else - LOG.info("mxosrvr " + nid + "," + pid + " exited, restarting"); + boolean isRunning = serverHandlers[childInstance - 1].serverMonitor.monitor(); + String nid = serverHandlers[childInstance - 1].serverMonitor.nid; + String pid = serverHandlers[childInstance - 1].serverMonitor.pid; + int restartAttempts = serverHandlers[childInstance - 1].getRestartAttempts(); + + serverHandlers[childInstance - 1] = null; retryCounter = retryCounterFactory.create(); while (!isTrafodionRunning(nid)) { - if (!retryCounter.shouldRetry()) { - throw new IOException("Node " + nid + " is not Up"); - } else { - retryCounter.sleepUntilNextRetry(); - retryCounter.useRetry(); - } - } - serverHandlers[childInstance-1] = new ServerHandler(childInstance); - completionService.submit(serverHandlers[childInstance-1]); + if (!retryCounter.shouldRetry()) { --- End diff -- I think you need to fix retryCounterFactory to do the looping for retry count or get rid of retryCounter completely and use the restartAttempt variable. For the latter case, the line no 445 shouldn't be using retryCounter.shouldRetry() method. How was this change unit tested?
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---