Repository: incubator-slider Updated Branches: refs/heads/develop 098507719 -> df964c370
SLIDER-1184 yarn.container.failure.threshold=0 is NOT honored (ctas582 via gourksaha) Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/df964c37 Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/df964c37 Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/df964c37 Branch: refs/heads/develop Commit: df964c3700fc50279735c161e53655dcafe68a4d Parents: 0985077 Author: Gour Saha <gourks...@apache.org> Authored: Tue Jan 17 08:31:49 2017 -0800 Committer: Gour Saha <gourks...@apache.org> Committed: Tue Jan 17 08:31:49 2017 -0800 ---------------------------------------------------------------------- .../slider/server/appmaster/state/AppState.java | 2 +- .../TestMockAppStateContainerFailure.groovy | 29 ++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/df964c37/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java index 49e7b78..19980aa 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java @@ -1953,7 +1953,7 @@ public class AppState { role.getName(), failures, threshold); } - if (failures > threshold) { + if (threshold > 0 && failures > threshold) { throw new TriggerClusterTeardownException( SliderExitCodes.EXIT_DEPLOYMENT_FAILED, FinalApplicationStatus.FAILED, ErrorStrings.E_UNSTABLE_CLUSTER + http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/df964c37/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy ---------------------------------------------------------------------- diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy index 3235827..0eb5456 100644 --- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy +++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy @@ -27,6 +27,7 @@ import org.apache.slider.core.exceptions.SliderException import org.apache.slider.core.exceptions.TriggerClusterTeardownException import org.apache.slider.server.appmaster.actions.ResetFailureWindow import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest +import org.apache.slider.server.appmaster.model.mock.MockAppState import org.apache.slider.server.appmaster.model.mock.MockRoles import org.apache.slider.server.appmaster.model.mock.MockYarnEngine import org.apache.slider.server.appmaster.state.AppState @@ -181,6 +182,34 @@ class TestMockAppStateContainerFailure extends BaseMockAppStateTest } } + @Test + public void testRecurrentStartupFailureWithUnlimitedFailures() throws Throwable { + // Update instance definition to allow containers to fail any number of times + def bindingInfo = buildBindingInfo() + def globalResourceOptions = bindingInfo.instanceDefinition.resourceOperations.globalOptions + globalResourceOptions.put(ResourceKeys.CONTAINER_FAILURE_THRESHOLD, "0") + appState = new MockAppState(bindingInfo) + + role0Status.desired = 1 + try { + for (int i = 0; i < 100; i++) { + List<RoleInstance> instances = createAndSubmitNodes() + assert instances.size() == 1 + + List<ContainerId> ids = extractContainerIds(instances, 0) + + ContainerId cid = ids[0] + log.info("$i instance $instances[0] $cid") + assert cid + appState.onNodeManagerContainerStartFailed(cid, new SliderException("failure #${i}")) + AppState.NodeCompletionResult result = appState.onCompletedNode(containerStatus(cid)) + assert result.containerFailed + } + } catch (TriggerClusterTeardownException teardown) { + log.info("Exception $teardown.exitCode : $teardown") + fail("Cluster failed despite $ResourceKeys.CONTAINER_FAILURE_THRESHOLD = 0") + } + } @Test public void testRoleStatusFailureWindow() throws Throwable {