incubator-slider git commit: SLIDER-1233 Lost nodes should not contribute to container failures (fix a fun-test)

2017-10-11 Thread gourksaha
Repository: incubator-slider
Updated Branches:
  refs/heads/develop cc7a644ea -> 720bd19df


SLIDER-1233 Lost nodes should not contribute to container failures (fix a 
fun-test)


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/720bd19d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/720bd19d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/720bd19d

Branch: refs/heads/develop
Commit: 720bd19df9d2c0ad74cd536802a3c0028f7cfa96
Parents: cc7a644
Author: Gour Saha 
Authored: Wed Oct 11 17:25:23 2017 -0700
Committer: Gour Saha 
Committed: Wed Oct 11 17:25:23 2017 -0700

--
 .../org/apache/slider/funtest/lifecycle/AppsUpgradeIT.groovy  | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/720bd19d/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsUpgradeIT.groovy
--
diff --git 
a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsUpgradeIT.groovy
 
b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsUpgradeIT.groovy
index e18a757..7e9ebf5 100644
--- 
a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsUpgradeIT.groovy
+++ 
b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsUpgradeIT.groovy
@@ -161,9 +161,10 @@ public class AppsUpgradeIT extends AgentCommandTestBase
 ])
 
 // verify
-describe("COMMAND_LOGGER container failed count should reach 1")
-expectFailedContainerCountReached(APPLICATION_NAME, COMMAND_LOGGER, 1,
-CONTAINER_LAUNCH_TIMEOUT)
+describe("COMMAND_LOGGER container failed count should remain 0, since "
+  + "container kills during upgrade is not counted as failures")
+def failedCount = queryFailedCount(APPLICATION_NAME, COMMAND_LOGGER)
+assert failedCount == 0
 describe("COMMAND_LOGGER container request count should reach 1")
 expectContainerRequestedCountReached(APPLICATION_NAME, COMMAND_LOGGER, 1,
 CONTAINER_LAUNCH_TIMEOUT)



incubator-slider git commit: SLIDER-1233 Lost nodes should not contribute to container failures

2017-07-25 Thread billie
Repository: incubator-slider
Updated Branches:
  refs/heads/develop 46396410b -> ef5954ded


SLIDER-1233 Lost nodes should not contribute to container failures


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/ef5954de
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/ef5954de
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/ef5954de

Branch: refs/heads/develop
Commit: ef5954dedf4f8503d2104987ffc061cb99e906f4
Parents: 4639641
Author: Billie Rinaldi 
Authored: Tue Jul 25 14:23:09 2017 -0700
Committer: Billie Rinaldi 
Committed: Tue Jul 25 14:23:09 2017 -0700

--
 .../server/appmaster/state/RoleStatus.java  |  4 +++
 .../TestMockAppStateContainerFailure.groovy | 35 
 2 files changed, 39 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/ef5954de/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
--
diff --git 
a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
 
b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
index 07a5cf9..694f5cf 100644
--- 
a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
+++ 
b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
@@ -289,6 +289,10 @@ public final class RoleStatus implements Cloneable, 
MetricSet {
   failedContainers.add(containerId);
 }
 switch (outcome) {
+  case Completed:
+// don't increment failure counts
+break;
+
   case Preempted:
 preempted.incrementAndGet();
 break;

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/ef5954de/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
--
diff --git 
a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
 
b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
index f6314b0..87095ad 100644
--- 
a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
+++ 
b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
@@ -20,6 +20,7 @@ package org.apache.slider.server.appmaster.model.appstate
 
 import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
+import org.apache.hadoop.yarn.api.records.ContainerExitStatus
 import org.apache.hadoop.yarn.api.records.ContainerId
 import org.apache.slider.api.ResourceKeys
 import org.apache.slider.core.conf.AggregateConf
@@ -216,6 +217,28 @@ class TestMockAppStateContainerFailure extends 
BaseMockAppStateTest
   }
 
   @Test
+  public void testRecurrentNodeLost() throws Throwable {
+role0Status.desired = 1
+try {
+  for (int i = 0; i < 100; i++) {
+List instances = createAndSubmitNodes()
+assert instances.size() == 1
+
+List ids = extractContainerIds(instances, 0)
+
+ContainerId cid = ids[0]
+log.info("$i instance $instances[0] $cid")
+assert cid
+AppState.NodeCompletionResult result = 
appState.onCompletedNode(containerStatus(cid, ContainerExitStatus.ABORTED))
+assert result.containerFailed
+  }
+} catch (TriggerClusterTeardownException teardown) {
+  log.info("Exception $teardown.exitCode : $teardown")
+  fail("Cluster failed despite aborted/killed container status")
+}
+  }
+
+  @Test
   public void testRoleStatusFailureWindow() throws Throwable {
 
 ResetFailureWindow resetter = new ResetFailureWindow(operationHandler);
@@ -308,6 +331,18 @@ class TestMockAppStateContainerFailure extends 
BaseMockAppStateTest
   }
 
   @Test
+  public void testRoleStatusCompleted() throws Throwable {
+def status = role0Status
+// aborted or killed
+status.noteFailed(false, "text", ContainerOutcome.Completed, null)
+assert 0 == status.failed
+assert 0L == status.failedRecently
+assert 0L == status.limitsExceeded
+assert 0L == status.preempted
+assert 0L == status.nodeFailed
+  }
+
+  @Test
   public void testNodeEntryCompleted() throws Throwable {
 NodeEntry nodeEntry = new NodeEntry(1)
 nodeEntry.containerCompleted(true, ContainerOutcome.Completed);