Author: edwardyoon
Date: Thu Nov 10 11:01:50 2011
New Revision: 1200267
URL: http://svn.apache.org/viewvc?rev=1200267&view=rev
Log:
The task should be killed if it fails to initialize
Modified:
incubator/hama/trunk/CHANGES.txt
incubator/hama/trunk/core/src/main/java/org/apache/hama/bsp/GroomServer.java
Modified: incubator/hama/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/incubator/hama/trunk/CHANGES.txt?rev=1200267&r1=1200266&r2=1200267&view=diff
==============================================================================
--- incubator/hama/trunk/CHANGES.txt (original)
+++ incubator/hama/trunk/CHANGES.txt Thu Nov 10 11:01:50 2011
@@ -15,6 +15,7 @@ Release 0.4 - Unreleased
BUG FIXES
+ HAMA-472: The task should be killed if it fails to initialize (edwardyoon)
HAMA-465: LocalJobRunner should support combiners and IO (tjungblut)
HAMA-459: GroomServerStatus.countTask() always returns 1 (edwardyoon)
HAMA-432: Add statusUpdate() method to BSPPeerProtocol (edwardyoon)
Modified:
incubator/hama/trunk/core/src/main/java/org/apache/hama/bsp/GroomServer.java
URL:
http://svn.apache.org/viewvc/incubator/hama/trunk/core/src/main/java/org/apache/hama/bsp/GroomServer.java?rev=1200267&r1=1200266&r2=1200267&view=diff
==============================================================================
---
incubator/hama/trunk/core/src/main/java/org/apache/hama/bsp/GroomServer.java
(original)
+++
incubator/hama/trunk/core/src/main/java/org/apache/hama/bsp/GroomServer.java
Thu Nov 10 11:01:50 2011
@@ -230,8 +230,8 @@ public class GroomServer implements Runn
// this.localDirAllocator = new LocalDirAllocator("bsp.local.dir");
try {
- zk = new ZooKeeper(QuorumPeer.getZKQuorumServersString(conf),
- conf.getInt(Constants.ZOOKEEPER_SESSION_TIMEOUT, 1200000), this);
+ zk = new ZooKeeper(QuorumPeer.getZKQuorumServersString(conf), conf
+ .getInt(Constants.ZOOKEEPER_SESSION_TIMEOUT, 1200000), this);
} catch (IOException e) {
LOG.error("Exception during reinitialization!", e);
}
@@ -243,9 +243,8 @@ public class GroomServer implements Runn
}
if (localHostname == null) {
- this.localHostname = DNS.getDefaultHost(
- conf.get("bsp.dns.interface", "default"),
- conf.get("bsp.dns.nameserver", "default"));
+ this.localHostname = DNS.getDefaultHost(conf.get("bsp.dns.interface",
+ "default"), conf.get("bsp.dns.nameserver", "default"));
}
// check local disk
checkLocalDirs(conf.getStrings("bsp.local.dir"));
@@ -474,6 +473,13 @@ public class GroomServer implements Runn
String msg = ("Error initializing " + tip.getTask().getTaskID() + ":\n"
+ StringUtils
.stringifyException(e));
LOG.warn(msg);
+
+ try {
+ tip.killAndCleanup(true);
+ } catch (IOException ie2) {
+ LOG.info("Error cleaning up " + tip.getTask().getTaskID() + ":\n"
+ + StringUtils.stringifyException(ie2));
+ }
}
}
@@ -733,7 +739,7 @@ public class GroomServer implements Runn
this.jobConf = jobConf;
this.localJobConf = null;
this.taskStatus = new TaskStatus(task.getJobID(), task.getTaskID(), 0,
- TaskStatus.State.UNASSIGNED, "running", groomServer,
+ TaskStatus.State.UNASSIGNED, "init", groomServer,
TaskStatus.Phase.STARTING);
}
@@ -776,11 +782,23 @@ public class GroomServer implements Runn
}
/**
- * This task has run on too long, and should be killed.
+ * Something went wrong and the task must be killed.
*/
public synchronized void killAndCleanup(boolean wasFailure)
throws IOException {
- runner.killBsp();
+ if (wasFailure) {
+ failures += 1;
+ taskStatus.setRunState(TaskStatus.State.FAILED);
+ } else {
+ taskStatus.setRunState(TaskStatus.State.KILLED);
+ }
+
+ if (taskStatus.getRunState() == TaskStatus.State.RUNNING) {
+ // runner could be null if task-cleanup attempt is not localized yet
+ if (runner != null) {
+ runner.killBsp();
+ }
+ }
}
/**