SLIDER-616 AMFailuresIT
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/dce4424e Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/dce4424e Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/dce4424e Branch: refs/heads/releases/slider-0.60 Commit: dce4424e183771c69b2e1239caffca6f8b692b22 Parents: 83c3e06 Author: Steve Loughran <[email protected]> Authored: Wed Nov 5 15:37:55 2014 +0000 Committer: Steve Loughran <[email protected]> Committed: Thu Nov 6 15:00:36 2014 +0000 ---------------------------------------------------------------------- slider-funtest/pom.xml | 4 +- .../framework/AgentCommandTestBase.groovy | 3 +- .../funtest/framework/CommandTestBase.groovy | 90 +++++++++++++++++--- .../funtest/lifecycle/AMFailuresIT.groovy | 37 +++----- 4 files changed, 96 insertions(+), 38 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/dce4424e/slider-funtest/pom.xml ---------------------------------------------------------------------- diff --git a/slider-funtest/pom.xml b/slider-funtest/pom.xml index 0a4c7ce..aa11f57 100644 --- a/slider-funtest/pom.xml +++ b/slider-funtest/pom.xml @@ -122,7 +122,7 @@ <test.app.pkg.name>CMD_LOGGER</test.app.pkg.name> <test.app.resource>../slider-core/src/test/app_packages/test_command_log/resources.json</test.app.resource> <test.app.template>../slider-core/src/test/app_packages/test_command_log/appConfig.json</test.app.template> - <vagrant.current.working.dir>/Users/gsaha/work/workspace/ambari-vagrant/centos6.4</vagrant.current.working.dir> + <vagrant.current.working.dir></vagrant.current.working.dir> </systemPropertyVariables> </configuration> </plugin> @@ -172,8 +172,6 @@ <reporting> <plugins> - - </plugins> </reporting> http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/dce4424e/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy ---------------------------------------------------------------------- diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy index 6e05056..668a264 100644 --- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy +++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy @@ -67,7 +67,8 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions { AGENTTESTS_LABELS_RED_BLUE_DEFINED = SLIDER_CONFIG.getBoolean(KEY_AGENTTESTS_LABELS_RED_BLUE_DEFINED, false) AGENTTESTS_AM_FAILURES_ENABLED = - SLIDER_CONFIG.getBoolean(KEY_AGENTTESTS_AM_FAILURES_ENABLED, false) + SLIDER_CONFIG.getBoolean(KEY_AGENTTESTS_AM_FAILURES_ENABLED, + AGENTTESTS_ENABLED) } protected String getAppResource() { http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/dce4424e/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy ---------------------------------------------------------------------- diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy index 36515c5..7d369d5 100644 --- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy +++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy @@ -775,12 +775,19 @@ abstract class CommandTestBase extends SliderTestUtils { ensureApplicationIsUp(cluster) return sliderClient.clusterDescription } - public ClusterDescription killAmAndWaitForRestart( - SliderClient sliderClient, String cluster, String appId) { - assert cluster + /** + * Kill an AM and await restrt + * @param sliderClient + * @param application + * @param appId + * @return + */ + public void killAmAndWaitForRestart(String application, String appId) { + + assert application slider(0, [ - ACTION_AM_SUICIDE, cluster, + ACTION_AM_SUICIDE, application, ARG_EXITCODE, "1", ARG_WAIT, "1000", ARG_MESSAGE, "suicide" @@ -788,9 +795,12 @@ abstract class CommandTestBase extends SliderTestUtils { sleep(5000) ensureYarnApplicationIsUp(appId) - return sliderClient.clusterDescription } + /** + * Spinning operation to perform a registry call + * @param application application + */ protected void ensureRegistryCallSucceeds(String application) { repeatUntilSuccess(this.&isRegistryAccessible, REGISTRY_STARTUP_TIMEOUT, @@ -808,7 +818,10 @@ abstract class CommandTestBase extends SliderTestUtils { } } - + /** + * wait for an application to come up + * @param application + */ protected void ensureApplicationIsUp(String application) { repeatUntilSuccess(this.&isApplicationRunning, SLIDER_CONFIG.getInt(KEY_TEST_INSTANCE_LAUNCH_TIME, @@ -822,6 +835,11 @@ abstract class CommandTestBase extends SliderTestUtils { } } + /** + * Is the registry accessible for an application? + * @param args argument map containing <code>"application"</code> + * @return probe outcome + */ protected Outcome isRegistryAccessible(Map<String, String> args) { String applicationName = args['application']; SliderShell shell = slider( @@ -837,11 +855,21 @@ abstract class CommandTestBase extends SliderTestUtils { return Outcome.fromBool(EXIT_SUCCESS == shell.execute()) } + /** + * Probe for an application running; uses <code>exists</code> operation + * @param args argument map containing <code>"application"</code> + * @return + */ protected Outcome isApplicationRunning(Map<String, String> args) { String applicationName = args['application']; return Outcome.fromBool(isApplicationUp(applicationName)) } + /** + * Use <code>exists</code> operation to probe for an application being up + * @param applicationName app name + * @return true if it s running + */ protected boolean isApplicationUp(String applicationName) { return isApplicationInState( applicationName, @@ -850,7 +878,8 @@ abstract class CommandTestBase extends SliderTestUtils { } /** - * is an application in a desired yarn state + * is an application in a desired yarn state. Uses the <code>exists</code> + * CLI operation * @param yarnState * @param applicationName * @return @@ -863,6 +892,11 @@ abstract class CommandTestBase extends SliderTestUtils { return shell.ret == 0 } + /** + * Probe callback for is the the app running or not + * @param args map where 'applicationId' must m + * @return + */ protected Outcome isYarnApplicationRunning(Map<String, String> args) { String applicationId = args['applicationId']; @@ -878,12 +912,25 @@ abstract class CommandTestBase extends SliderTestUtils { */ public static Outcome isYarnApplicationRunning( String applicationId) { - YarnApplicationState appState = lookupYarnAppState(applicationId) YarnApplicationState yarnState = YarnApplicationState.RUNNING + return isYarnApplicationInState(applicationId, yarnState) + } + + /** + * Probe for a YARN application being in a given state + * @param applicationId app id + * @param yarnStat desired state + * @return success for a match, retry if state below desired, and fail if + * above it + */ + public static Outcome isYarnApplicationInState( + String applicationId, + YarnApplicationState yarnState) { + YarnApplicationState appState = lookupYarnAppState(applicationId) if (yarnState == appState) { return Outcome.Success; } - + if (appState.ordinal() > yarnState.ordinal()) { // app has passed beyond hope return Outcome.Fail @@ -891,6 +938,11 @@ abstract class CommandTestBase extends SliderTestUtils { return Outcome.Retry } + /** + * Look up the YARN application by ID, get its application record + * @param applicationId the application ID + * @return the application state + */ public static YarnApplicationState lookupYarnAppState(String applicationId) { def sar = lookupApplication(applicationId) assert sar != null; @@ -898,6 +950,11 @@ abstract class CommandTestBase extends SliderTestUtils { return appState } + /** + * Assert an application is in a given state; fail if not + * @param applicationId appId + * @param expectedState expected state + */ public static void assertInYarnState(String applicationId, YarnApplicationState expectedState) { def applicationReport = lookupApplication(applicationId) @@ -914,14 +971,14 @@ abstract class CommandTestBase extends SliderTestUtils { ensureYarnApplicationIsUp(id) return id; } + /** * Wait for the YARN app to come up. This will fail fast * @param applicationId */ protected void ensureYarnApplicationIsUp(String applicationId) { repeatUntilSuccess(this.&isYarnApplicationRunning, - SLIDER_CONFIG.getInt(KEY_TEST_INSTANCE_LAUNCH_TIME, - DEFAULT_INSTANCE_LAUNCH_TIME_SECONDS), + instanceLaunchTime, PROBE_SLEEP_TIME, [applicationId: applicationId], true, @@ -935,6 +992,17 @@ abstract class CommandTestBase extends SliderTestUtils { } } + /** + * Get the expected launch time. Default is the configuration option + * {@link FuntestProperties#KEY_TEST_INSTANCE_LAUNCH_TIME} and + * default value {@link FuntestProperties#KEY_TEST_INSTANCE_LAUNCH_TIME} + * @return + */ + public int getInstanceLaunchTime() { + return SLIDER_CONFIG.getInt(KEY_TEST_INSTANCE_LAUNCH_TIME, + DEFAULT_INSTANCE_LAUNCH_TIME_SECONDS) + } + public String getInfoAmWebUrl(String applicationName) { ClusterDescription cd = execStatus(applicationName); String urlString = cd.getInfo("info.am.web.url"); http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/dce4424e/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AMFailuresIT.groovy ---------------------------------------------------------------------- diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AMFailuresIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AMFailuresIT.groovy index a78ae87..6238164 100644 --- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AMFailuresIT.groovy +++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AMFailuresIT.groovy @@ -72,9 +72,6 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions { def appId = ensureYarnApplicationIsUp(launchReportFile) expectContainerRequestedCountReached(APPLICATION_NAME, COMMAND_LOGGER, 1, CONTAINER_LAUNCH_TIMEOUT) - - // Wait for 20 secs for AM and agent to both reach STARTED state - sleep(1000 * 20) def cd = assertContainersLive(APPLICATION_NAME, COMMAND_LOGGER, 1) def loggerInstances = cd.instances[COMMAND_LOGGER] @@ -82,37 +79,32 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions { def loggerStats = cd.statistics[COMMAND_LOGGER] - def origRequested = loggerStats["containers.requested"] - assert origRequested >= 2 + assert loggerStats["containers.requested"] == 1 assert loggerStats["containers.live"] == 1 - assert isApplicationUp(APPLICATION_NAME), 'App is not running.' - assertSuccess(shell) - // Now kill the AM log.info("Killing AM now ...") // killAMUsingJsch() -// killAMUsingAmSuicide() - killAMUsingVagrantShell() - - // Check that the application is not running (and is in ACCEPTED state) - assert lookupYarnAppState(appId) == YarnApplicationState.ACCEPTED , - 'App should be in ACCEPTED state (since AM got killed)' - log.info("After AM KILL: application {} is in ACCEPTED state", APPLICATION_NAME) - - // Wait until AM comes back up and verify container count again - ensureYarnApplicationIsUp(appId) + killAmAndWaitForRestart(APPLICATION_NAME, appId) // There should be exactly 1 live logger container def cd2 = assertContainersLive(APPLICATION_NAME, COMMAND_LOGGER, 1) // No new containers should be requested for the agents def loggerStats2 = cd2.statistics[COMMAND_LOGGER] - assert origRequested == loggerStats2["containers.requested"], + assert loggerStats["containers.requested"] == loggerStats2["containers.requested"], 'No new agent containers should be requested' assert lookupYarnAppState(appId) == YarnApplicationState.RUNNING } + /** + * Allow for 2x as long as other test instances, as for AM restart we + * need to allow for a longer delay + */ + @Override + int getInstanceLaunchTime() { + return 2* super.instanceLaunchTime + } protected void killAMUsingAmSuicide() { SliderShell shell = slider(EXIT_SUCCESS, @@ -121,12 +113,11 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions { ARG_MESSAGE, "testAMRestart", APPLICATION_NAME]) logShell(shell) - assertSuccess(shell) } protected void killAMUsingVagrantShell() { String hostname = SLIDER_CONFIG.get(YarnConfiguration.RM_ADDRESS).split(":")[0] - assert hostname != null && !hostname.isEmpty() + assert hostname String vagrantVmName = hostname.split("\\.")[0] String vagrantCwd = sysprop(VAGRANT_CWD) @@ -146,8 +137,8 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions { protected void killAMUsingJsch() { String hostname = SLIDER_CONFIG.get(YarnConfiguration.RM_ADDRESS).split(":")[0] String user = UserGroupInformation.currentUser - assert hostname != null && !hostname.isEmpty() - assert user != null && !user.isEmpty() + assert hostname + assert user bindSSHKey() RemoteServer remoteServer = new RemoteServer(
