[
https://issues.apache.org/jira/browse/TRAFODION-2668?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16064112#comment-16064112
]
ASF GitHub Bot commented on TRAFODION-2668:
-------------------------------------------
Github user kevinxu021 commented on a diff in the pull request:
https://github.com/apache/incubator-trafodion/pull/1144#discussion_r124162364
--- Diff: dcs/src/main/java/org/trafodion/dcs/server/ServerManager.java ---
@@ -414,32 +431,40 @@ public Boolean call() throws Exception {
// finish
if (f != null) {
Integer result = f.get();
+ LOG.debug("Server handler [" + instance + ":" + result
+ "] finished");
int childInstance = result.intValue();
// get the node id
- boolean isRunning =
serverHandlers[childInstance-1].serverMonitor.isPidRunning();
- String nid =
serverHandlers[childInstance-1].serverMonitor.nid;
- String pid =
serverHandlers[childInstance-1].serverMonitor.pid;
- serverHandlers[childInstance-1] = null;
- LOG.debug("Server handler [" + instance + ":" + result
- + "] finished, restarting");
- if (isRunning)
- LOG.info("mxosrvr " + nid + "," + pid + " still
running");
- else
- LOG.info("mxosrvr " + nid + "," + pid + " exited,
restarting");
+ boolean isRunning = serverHandlers[childInstance -
1].serverMonitor.monitor();
+ String nid = serverHandlers[childInstance -
1].serverMonitor.nid;
+ String pid = serverHandlers[childInstance -
1].serverMonitor.pid;
+ int restartAttempts = serverHandlers[childInstance -
1].getRestartAttempts();
+
+ serverHandlers[childInstance - 1] = null;
retryCounter = retryCounterFactory.create();
while (!isTrafodionRunning(nid)) {
- if (!retryCounter.shouldRetry()) {
- throw new IOException("Node " + nid + " is not
Up");
- } else {
- retryCounter.sleepUntilNextRetry();
- retryCounter.useRetry();
- }
- }
- serverHandlers[childInstance-1] = new
ServerHandler(childInstance);
-
completionService.submit(serverHandlers[childInstance-1]);
+ if (!retryCounter.shouldRetry()) {
+ throw new IOException("Node " + nid + " is not
Up");
+ } else {
+ retryCounter.sleepUntilNextRetry();
+ retryCounter.useRetry();
+ }
+ }
+ if (isRunning) {
--- End diff --
Merge the logic with line 444
> one mxosrvr can't startup, dcsServer down
> -----------------------------------------
>
> Key: TRAFODION-2668
> URL: https://issues.apache.org/jira/browse/TRAFODION-2668
> Project: Apache Trafodion
> Issue Type: Bug
> Reporter: mashengchen
> Assignee: mashengchen
>
> if one of the mxosrvr have any error, while dscserver is starting, dcsserver
> will down
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)