Repository: tez
Updated Branches:
refs/heads/branch-0.5 de87d72c2 -> 7600f3861
TEZ-2398. Flaky test: TestFaultTolerance (bikas)
(cherry picked from commit 406721ab17b58e29e5bf3585d556700c2ef04f05)
Conflicts:
CHANGES.txt
(cherry picked from commit 7dac26e75f094f0486d4ce4390885dc468703799)
Conflicts:
CHANGES.txt
(cherry picked from commit d47cdb99d5bd4dc0fb8d8efe3dd9b9ba1f2bb2cf)
Conflicts:
tez-tests/src/test/java/org/apache/tez/test/TestInput.java
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/7600f386
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/7600f386
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/7600f386
Branch: refs/heads/branch-0.5
Commit: 7600f386111f432c771f2cb03bd6b072053f574f
Parents: de87d72
Author: Bikas Saha <[email protected]>
Authored: Fri Sep 25 10:30:53 2015 -0700
Committer: Bikas Saha <[email protected]>
Committed: Fri Sep 25 10:54:00 2015 -0700
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../java/org/apache/tez/test/TestFaultTolerance.java | 5 +----
.../src/test/java/org/apache/tez/test/TestInput.java | 13 +++++++++----
3 files changed, 11 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/7600f386/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index e5d19b2..3a46867 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -7,6 +7,7 @@ INCOMPATIBLE CHANGES
TEZ-2552. CRC errors can cause job to run for very long time in large jobs.
ALL CHANGES:
+ TEZ-2398. Flaky test: TestFaultTolerance
TEZ-2808. Race condition between preemption and container assignment
TEZ-1929. pre-empted tasks should be marked as killed instead of failed
TEZ-1773. Add attempt failure cause enum to the attempt failed/killed
http://git-wip-us.apache.org/repos/asf/tez/blob/7600f386/tez-tests/src/test/java/org/apache/tez/test/TestFaultTolerance.java
----------------------------------------------------------------------
diff --git
a/tez-tests/src/test/java/org/apache/tez/test/TestFaultTolerance.java
b/tez-tests/src/test/java/org/apache/tez/test/TestFaultTolerance.java
index 11ce4bc..ad7b16e 100644
--- a/tez-tests/src/test/java/org/apache/tez/test/TestFaultTolerance.java
+++ b/tez-tests/src/test/java/org/apache/tez/test/TestFaultTolerance.java
@@ -74,7 +74,7 @@ public class TestFaultTolerance {
}
if (miniTezCluster == null) {
miniTezCluster = new MiniTezCluster(TestFaultTolerance.class.getName(),
- 4, 1, 1);
+ 3, 1, 1);
Configuration miniTezconf = new Configuration(conf);
miniTezconf.set("fs.defaultFS", remoteFs.getUri().toString()); // use
HDFS
miniTezCluster.init(miniTezconf);
@@ -242,9 +242,6 @@ public class TestFaultTolerance {
TestProcessor.TEZ_FAILING_PROCESSOR_VERIFY_TASK_INDEX, "v2"),
"0,1");
testConf.setInt(TestProcessor.getVertexConfName(
TestProcessor.TEZ_FAILING_PROCESSOR_VERIFY_VALUE, "v2", 1), 5);
- //v2 task0 attempt 0 succeeds instantly.
- testConf.setInt(TestProcessor.getVertexConfName(
- TestProcessor.TEZ_FAILING_PROCESSOR_VERIFY_VALUE, "v2", 0), 3);
DAG dag = SimpleTestDAG.createDAG("testBasicInputFailureWithExit",
testConf);
runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
http://git-wip-us.apache.org/repos/asf/tez/blob/7600f386/tez-tests/src/test/java/org/apache/tez/test/TestInput.java
----------------------------------------------------------------------
diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestInput.java
b/tez-tests/src/test/java/org/apache/tez/test/TestInput.java
index 8498acb..d1e4cf8 100644
--- a/tez-tests/src/test/java/org/apache/tez/test/TestInput.java
+++ b/tez-tests/src/test/java/org/apache/tez/test/TestInput.java
@@ -71,7 +71,6 @@ public class TestInput extends AbstractLogicalInput {
Set<Integer> failingInputIndices = Sets.newHashSet();
Integer failAll = new Integer(-1);
int[] inputValues;
- AtomicInteger numEventsReceived = new AtomicInteger(0);
/**
* Enable failure for this logical input
@@ -177,7 +176,6 @@ public class TestInput extends AbstractLogicalInput {
LOG.info("Failing input: " + msg);
}
}
- int numEvents = numEventsReceived.get();
getContext().sendEvents(events);
if (doFailAndExit) {
String msg = "FailingInput exiting: " +
getContext().getUniqueIdentifier();
@@ -185,7 +183,15 @@ public class TestInput extends AbstractLogicalInput {
throwException(msg);
} else {
try {
- while (numEvents == numEventsReceived.get()) {
+ // keep sending input read error until we receive the new input
+ // this check breaks the loop when we see a new input version
+ // thus, when multiple input versions arrive, this methods gets
triggered
+ // for each version via wait-notify. But all events may have
been processed in
+ // handleEvents() before the code reaches this point. Having
this loop, makes
+ // it quickly exit for an older version if a newer version has
been seen.
+ // however, if a newer version is not seen then it keeps sending
input error
+ // indefinitely, by design.
+ while (lastInputReadyValue == inputReady.get()) {
// keep sending events
Thread.sleep(500);
getContext().sendEvents(events);
@@ -292,7 +298,6 @@ public class TestInput extends AbstractLogicalInput {
@Override
public void handleEvents(List<Event> inputEvents) throws Exception {
for (Event event : inputEvents) {
- numEventsReceived.incrementAndGet();
if (event instanceof DataMovementEvent) {
DataMovementEvent dmEvent = (DataMovementEvent) event;
numCompletedInputs++;