Repository: incubator-slider
Updated Branches:
  refs/heads/develop 46396410b -> ef5954ded


SLIDER-1233 Lost nodes should not contribute to container failures


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/ef5954de
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/ef5954de
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/ef5954de

Branch: refs/heads/develop
Commit: ef5954dedf4f8503d2104987ffc061cb99e906f4
Parents: 4639641
Author: Billie Rinaldi <bil...@apache.org>
Authored: Tue Jul 25 14:23:09 2017 -0700
Committer: Billie Rinaldi <bil...@apache.org>
Committed: Tue Jul 25 14:23:09 2017 -0700

----------------------------------------------------------------------
 .../server/appmaster/state/RoleStatus.java      |  4 +++
 .../TestMockAppStateContainerFailure.groovy     | 35 ++++++++++++++++++++
 2 files changed, 39 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/ef5954de/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
----------------------------------------------------------------------
diff --git 
a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
 
b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
index 07a5cf9..694f5cf 100644
--- 
a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
+++ 
b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
@@ -289,6 +289,10 @@ public final class RoleStatus implements Cloneable, 
MetricSet {
       failedContainers.add(containerId);
     }
     switch (outcome) {
+      case Completed:
+        // don't increment failure counts
+        break;
+
       case Preempted:
         preempted.incrementAndGet();
         break;

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/ef5954de/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
----------------------------------------------------------------------
diff --git 
a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
 
b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
index f6314b0..87095ad 100644
--- 
a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
+++ 
b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
@@ -20,6 +20,7 @@ package org.apache.slider.server.appmaster.model.appstate
 
 import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
+import org.apache.hadoop.yarn.api.records.ContainerExitStatus
 import org.apache.hadoop.yarn.api.records.ContainerId
 import org.apache.slider.api.ResourceKeys
 import org.apache.slider.core.conf.AggregateConf
@@ -216,6 +217,28 @@ class TestMockAppStateContainerFailure extends 
BaseMockAppStateTest
   }
 
   @Test
+  public void testRecurrentNodeLost() throws Throwable {
+    role0Status.desired = 1
+    try {
+      for (int i = 0; i < 100; i++) {
+        List<RoleInstance> instances = createAndSubmitNodes()
+        assert instances.size() == 1
+
+        List<ContainerId> ids = extractContainerIds(instances, 0)
+
+        ContainerId cid = ids[0]
+        log.info("$i instance $instances[0] $cid")
+        assert cid
+        AppState.NodeCompletionResult result = 
appState.onCompletedNode(containerStatus(cid, ContainerExitStatus.ABORTED))
+        assert result.containerFailed
+      }
+    } catch (TriggerClusterTeardownException teardown) {
+      log.info("Exception $teardown.exitCode : $teardown")
+      fail("Cluster failed despite aborted/killed container status")
+    }
+  }
+
+  @Test
   public void testRoleStatusFailureWindow() throws Throwable {
 
     ResetFailureWindow resetter = new ResetFailureWindow(operationHandler);
@@ -308,6 +331,18 @@ class TestMockAppStateContainerFailure extends 
BaseMockAppStateTest
   }
 
   @Test
+  public void testRoleStatusCompleted() throws Throwable {
+    def status = role0Status
+    // aborted or killed
+    status.noteFailed(false, "text", ContainerOutcome.Completed, null)
+    assert 0 == status.failed
+    assert 0L == status.failedRecently
+    assert 0L == status.limitsExceeded
+    assert 0L == status.preempted
+    assert 0L == status.nodeFailed
+  }
+
+  @Test
   public void testNodeEntryCompleted() throws Throwable {
     NodeEntry nodeEntry = new NodeEntry(1)
     nodeEntry.containerCompleted(true, ContainerOutcome.Completed);

Reply via email to