Repository: incubator-slider
Updated Branches:
  refs/heads/develop 098507719 -> df964c370


SLIDER-1184 yarn.container.failure.threshold=0 is NOT honored (ctas582 via 
gourksaha)


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/df964c37
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/df964c37
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/df964c37

Branch: refs/heads/develop
Commit: df964c3700fc50279735c161e53655dcafe68a4d
Parents: 0985077
Author: Gour Saha <gourks...@apache.org>
Authored: Tue Jan 17 08:31:49 2017 -0800
Committer: Gour Saha <gourks...@apache.org>
Committed: Tue Jan 17 08:31:49 2017 -0800

----------------------------------------------------------------------
 .../slider/server/appmaster/state/AppState.java |  2 +-
 .../TestMockAppStateContainerFailure.groovy     | 29 ++++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/df964c37/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
----------------------------------------------------------------------
diff --git 
a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
 
b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
index 49e7b78..19980aa 100644
--- 
a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
+++ 
b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
@@ -1953,7 +1953,7 @@ public class AppState {
           role.getName(), failures, threshold);
     }
 
-    if (failures > threshold) {
+    if (threshold > 0 && failures > threshold) {
       throw new TriggerClusterTeardownException(
         SliderExitCodes.EXIT_DEPLOYMENT_FAILED,
           FinalApplicationStatus.FAILED, ErrorStrings.E_UNSTABLE_CLUSTER +

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/df964c37/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
----------------------------------------------------------------------
diff --git 
a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
 
b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
index 3235827..0eb5456 100644
--- 
a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
+++ 
b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
@@ -27,6 +27,7 @@ import org.apache.slider.core.exceptions.SliderException
 import org.apache.slider.core.exceptions.TriggerClusterTeardownException
 import org.apache.slider.server.appmaster.actions.ResetFailureWindow
 import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
+import org.apache.slider.server.appmaster.model.mock.MockAppState
 import org.apache.slider.server.appmaster.model.mock.MockRoles
 import org.apache.slider.server.appmaster.model.mock.MockYarnEngine
 import org.apache.slider.server.appmaster.state.AppState
@@ -181,6 +182,34 @@ class TestMockAppStateContainerFailure extends 
BaseMockAppStateTest
     }
   }
 
+  @Test
+  public void testRecurrentStartupFailureWithUnlimitedFailures() throws 
Throwable {
+    // Update instance definition to allow containers to fail any number of 
times
+    def bindingInfo = buildBindingInfo()
+    def globalResourceOptions = 
bindingInfo.instanceDefinition.resourceOperations.globalOptions
+    globalResourceOptions.put(ResourceKeys.CONTAINER_FAILURE_THRESHOLD, "0")
+    appState = new MockAppState(bindingInfo)
+
+    role0Status.desired = 1
+    try {
+      for (int i = 0; i < 100; i++) {
+        List<RoleInstance> instances = createAndSubmitNodes()
+        assert instances.size() == 1
+
+        List<ContainerId> ids = extractContainerIds(instances, 0)
+
+        ContainerId cid = ids[0]
+        log.info("$i instance $instances[0] $cid")
+        assert cid
+        appState.onNodeManagerContainerStartFailed(cid, new 
SliderException("failure #${i}"))
+        AppState.NodeCompletionResult result = 
appState.onCompletedNode(containerStatus(cid))
+        assert result.containerFailed
+      }
+    } catch (TriggerClusterTeardownException teardown) {
+      log.info("Exception $teardown.exitCode : $teardown")
+      fail("Cluster failed despite $ResourceKeys.CONTAINER_FAILURE_THRESHOLD = 
0")
+    }
+  }
 
   @Test
   public void testRoleStatusFailureWindow() throws Throwable {

Reply via email to