Repository: incubator-slider
Updated Branches:
refs/heads/develop 46396410b -> ef5954ded
SLIDER-1233 Lost nodes should not contribute to container failures
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/ef5954de
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/ef5954de
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/ef5954de
Branch: refs/heads/develop
Commit: ef5954dedf4f8503d2104987ffc061cb99e906f4
Parents: 4639641
Author: Billie Rinaldi
Authored: Tue Jul 25 14:23:09 2017 -0700
Committer: Billie Rinaldi
Committed: Tue Jul 25 14:23:09 2017 -0700
--
.../server/appmaster/state/RoleStatus.java | 4 +++
.../TestMockAppStateContainerFailure.groovy | 35
2 files changed, 39 insertions(+)
--
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/ef5954de/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
--
diff --git
a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
index 07a5cf9..694f5cf 100644
---
a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
+++
b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
@@ -289,6 +289,10 @@ public final class RoleStatus implements Cloneable,
MetricSet {
failedContainers.add(containerId);
}
switch (outcome) {
+ case Completed:
+// don't increment failure counts
+break;
+
case Preempted:
preempted.incrementAndGet();
break;
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/ef5954de/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
--
diff --git
a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
index f6314b0..87095ad 100644
---
a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
+++
b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
@@ -20,6 +20,7 @@ package org.apache.slider.server.appmaster.model.appstate
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
+import org.apache.hadoop.yarn.api.records.ContainerExitStatus
import org.apache.hadoop.yarn.api.records.ContainerId
import org.apache.slider.api.ResourceKeys
import org.apache.slider.core.conf.AggregateConf
@@ -216,6 +217,28 @@ class TestMockAppStateContainerFailure extends
BaseMockAppStateTest
}
@Test
+ public void testRecurrentNodeLost() throws Throwable {
+role0Status.desired = 1
+try {
+ for (int i = 0; i < 100; i++) {
+List instances = createAndSubmitNodes()
+assert instances.size() == 1
+
+List ids = extractContainerIds(instances, 0)
+
+ContainerId cid = ids[0]
+log.info("$i instance $instances[0] $cid")
+assert cid
+AppState.NodeCompletionResult result =
appState.onCompletedNode(containerStatus(cid, ContainerExitStatus.ABORTED))
+assert result.containerFailed
+ }
+} catch (TriggerClusterTeardownException teardown) {
+ log.info("Exception $teardown.exitCode : $teardown")
+ fail("Cluster failed despite aborted/killed container status")
+}
+ }
+
+ @Test
public void testRoleStatusFailureWindow() throws Throwable {
ResetFailureWindow resetter = new ResetFailureWindow(operationHandler);
@@ -308,6 +331,18 @@ class TestMockAppStateContainerFailure extends
BaseMockAppStateTest
}
@Test
+ public void testRoleStatusCompleted() throws Throwable {
+def status = role0Status
+// aborted or killed
+status.noteFailed(false, "text", ContainerOutcome.Completed, null)
+assert 0 == status.failed
+assert 0L == status.failedRecently
+assert 0L == status.limitsExceeded
+assert 0L == status.preempted
+assert 0L == status.nodeFailed
+ }
+
+ @Test
public void testNodeEntryCompleted() throws Throwable {
NodeEntry nodeEntry = new NodeEntry(1)
nodeEntry.containerCompleted(true, ContainerOutcome.Completed);