Repository: incubator-slider Updated Branches: refs/heads/develop 5c6b0f10b -> 68e2ef9e8
SLIDER-787 App Upgrade/Reconfig support in Slider (fun tests) Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/68e2ef9e Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/68e2ef9e Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/68e2ef9e Branch: refs/heads/develop Commit: 68e2ef9e8a5d55937f34f488eabee50223d307ec Parents: 5c6b0f1 Author: Gour Saha <[email protected]> Authored: Wed Apr 22 12:54:54 2015 -0700 Committer: Gour Saha <[email protected]> Committed: Wed Apr 22 12:56:29 2015 -0700 ---------------------------------------------------------------------- .../slider-pkg/package/scripts/cl.py | 10 + .../funtest/framework/CommandTestBase.groovy | 109 ++++++++++- .../funtest/lifecycle/AppsUpgradeIT.groovy | 182 +++++++++++++++++++ 3 files changed, 295 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/68e2ef9e/app-packages/command-logger/slider-pkg/package/scripts/cl.py ---------------------------------------------------------------------- diff --git a/app-packages/command-logger/slider-pkg/package/scripts/cl.py b/app-packages/command-logger/slider-pkg/package/scripts/cl.py index b15bbfd..5145fbb 100644 --- a/app-packages/command-logger/slider-pkg/package/scripts/cl.py +++ b/app-packages/command-logger/slider-pkg/package/scripts/cl.py @@ -84,5 +84,15 @@ class CommandLogger(Script): template_tag = None ) + def pre_upgrade(self, env): + import params + env.set_params(params) + Logger.info("Pre upgrade checks.") + + def post_upgrade(self, env): + import params + env.set_params(params) + Logger.info("Post upgrade checks.") + if __name__ == "__main__": CommandLogger().execute() http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/68e2ef9e/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy ---------------------------------------------------------------------- diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy index 1c94eae..76853d0 100644 --- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy +++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy @@ -956,6 +956,32 @@ abstract class CommandTestBase extends SliderTestUtils { } /** + * Wait for an application to move out of a specific state. Don't fail this + * test if the application is never found to move of the state. State + * transitions sometimes happen so fast that short lived transient states + * might not caught during probes. Hence just exit success, after timeout. + * + * @param application + * @param yarnState + */ + protected void ensureApplicationNotInState(String application, + YarnApplicationState yarnState) { + repeatUntilSuccess("await application to be not in state " + yarnState, + this.&isApplicationNotInState, + 10000, + 0, + [ + application: application, + yarnState: yarnState + ], + false, + "") { + describe "final state of app for not in state check" + exists(application).dumpOutput() + } + } + + /** * Is the registry accessible for an application? * @param args argument map containing <code>"application"</code> * @return probe outcome @@ -998,6 +1024,20 @@ abstract class CommandTestBase extends SliderTestUtils { } /** + * Probe for an application to be in a state other than the specified state; + * uses <code>exists</code> operation + * @param args argument map containing <code>"application"</code> and + * <code>state</code> + * @return + */ + protected Outcome isApplicationNotInState(Map<String, String> args) { + String applicationName = args['application']; + YarnApplicationState yarnState = YarnApplicationState + .valueOf(args['yarnState']); + return Outcome.fromBool(!isApplicationInState(applicationName, yarnState)) + } + + /** * is an application in a desired yarn state. Uses the <code>exists</code> * CLI operation * @param yarnState @@ -1219,7 +1259,7 @@ abstract class CommandTestBase extends SliderTestUtils { int container_launch_timeout) { repeatUntilSuccess( - "await container count", + "await requested container count", this.&hasRequestedContainerCountReached, container_launch_timeout, PROBE_SLEEP_TIME, @@ -1227,11 +1267,11 @@ abstract class CommandTestBase extends SliderTestUtils { role : role, application: application], true, - "countainer count not reached") { + "requested countainer count not reached") { int requestedCount = queryRequestedCount(application, role) - def message = "expected count of $role = $limit not reached: $requestedCount" + - " after $container_launch_timeout mS" + def message = "expected request count of $role = $limit not reached, " + + "actual: $requestedCount after $container_launch_timeout ms" describe message ClusterDescription cd = execStatus(application); log.info("Parsed status \n$cd") @@ -1288,12 +1328,69 @@ abstract class CommandTestBase extends SliderTestUtils { component : component, application: application], true, - "countainer count not reached") { - describe "container count not reached" + "live countainer count not reached") { + describe "live container count not reached" assertContainersLive(application, component, expected) } } + public int queryFailedCount(String application, String role) { + ClusterDescription cd = execStatus(application) + if (cd.statistics.size() == 0) { + log.debug("No statistics entries") + } + if (!cd.statistics[role]) { + log.debug("No stats for role $role") + return 0; + } + def statsForRole = cd.statistics[role] + + def failed = statsForRole[StatusKeys.STATISTICS_CONTAINERS_FAILED] + assert null != failed + return failed + } + + /** + * Probe: has the failed container count of a specific role been reached? + * @param args map with: "application", "role", "limit" + * @return success on a match, retry if not + */ + Outcome hasFailedContainerCountReached(Map<String, String> args) { + String application = args['application'] + String role = args['role'] + int expectedCount = args['limit'].toInteger(); + + int failedCount = queryFailedCount(application, role) + log.debug("failed $role count = $failedCount; expected=$expectedCount") + return Outcome.fromBool(failedCount >= expectedCount) + } + + /** + * Wait for the failed container count to be reached + * @param application application name + * @param component component name + * @param expected expected count + * @param container_launch_timeout launch timeout + */ + void expectFailedContainerCountReached( + String application, + String component, + int expected, + int container_launch_timeout) { + repeatUntilSuccess( + "await failed container count", + this.&hasFailedContainerCountReached, + container_launch_timeout, + PROBE_SLEEP_TIME, + [limit : Integer.toString(expected), + role : component, + application: application], + true, + "failed countainer count not reached") { + describe "failed container count not reached" + } + } + /** * Spin for <code>REGISTRY_STARTUP_TIMEOUT</code> waiting * for the output of the registry command to contain the specified http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/68e2ef9e/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsUpgradeIT.groovy ---------------------------------------------------------------------- diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsUpgradeIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsUpgradeIT.groovy new file mode 100644 index 0000000..682a1a0 --- /dev/null +++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsUpgradeIT.groovy @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.slider.funtest.lifecycle + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import org.apache.hadoop.yarn.api.records.YarnApplicationState +import org.apache.slider.api.StatusKeys +import org.apache.slider.common.SliderExitCodes +import org.apache.slider.common.params.Arguments +import org.apache.slider.common.params.SliderActions +import org.apache.slider.funtest.framework.AgentCommandTestBase +import org.apache.slider.funtest.framework.FuntestProperties +import org.apache.slider.funtest.framework.SliderShell +import org.junit.After +import org.junit.Test + +/** + * These are the steps required for this Rolling Upgrade (RU) test - + * - Install an app package + * - Deploy/create an app instance + * - Install a new version of app package + * currently same ver is used, needs to be upgraded once a strategy to check + * application version can be found. Until then using same version is fine. + * - Run the Slider App RU runbook steps (- and + scenarios) + * - Verify the expected statuses and container counts + * - Note: This is a lengthy test (takes approx 3-4 mins). RU tests contain + * multiple steps and validations and is expected to take that long. + */ +@CompileStatic +@Slf4j +public class AppsUpgradeIT extends AgentCommandTestBase + implements FuntestProperties, Arguments, SliderExitCodes, SliderActions { + private static String COMMAND_LOGGER = "COMMAND_LOGGER" + private static String APPLICATION_NAME = "app-upgrade-happy-path" + private static String APP_RESOURCE = + "../slider-core/src/test/app_packages/test_command_log/resources.json" + + @After + public void destroyCluster() { + cleanup(APPLICATION_NAME) + } + + @Test + public void testUpgrade() throws Throwable { + assumeAgentTestsEnabled() + + cleanup(APPLICATION_NAME) + File launchReportFile = createTempJsonFile(); + SliderShell shell = createTemplatedSliderApplication( + APPLICATION_NAME, + APP_TEMPLATE, + APP_RESOURCE, + [], + launchReportFile) + logShell(shell) + + def appId = ensureYarnApplicationIsUp(launchReportFile) + + expectContainerRequestedCountReached(APPLICATION_NAME, COMMAND_LOGGER, 1, + CONTAINER_LAUNCH_TIMEOUT) + assertContainersLive(APPLICATION_NAME, COMMAND_LOGGER, 1) + + // flex + slider(EXIT_SUCCESS, + [ + ACTION_FLEX, + APPLICATION_NAME, + ARG_COMPONENT, + COMMAND_LOGGER, + "3" + ]) + + // spin till the flexed instance starts + ensureYarnApplicationIsUp(appId) + expectContainerRequestedCountReached(APPLICATION_NAME, COMMAND_LOGGER, 3, + CONTAINER_LAUNCH_TIMEOUT) + + // upgrade spec + describe("Call upgrade spec - spec mismatch with current state") + slider(EXIT_BAD_CONFIGURATION, + [ + ACTION_UPGRADE, + APPLICATION_NAME, + ARG_TEMPLATE, + APP_TEMPLATE, + ARG_RESOURCES, + APP_RESOURCE + ]) + describe("Call upgrade spec - spec mismatch with current state, use --force") + slider(EXIT_SUCCESS, + [ + ACTION_UPGRADE, + APPLICATION_NAME, + ARG_TEMPLATE, + APP_TEMPLATE, + ARG_RESOURCES, + APP_RESOURCE, + ARG_FORCE + ]) + + describe("Check Slider AM goes down and then comes back up") + ensureApplicationNotInState(APPLICATION_NAME, YarnApplicationState.RUNNING) + // Spin till the Slider AM is back up. Note: COMMAND_LOGGER + // container count goes down to 1 here (due to spec change) + ensureYarnApplicationIsUp(appId) + describe("COMMAND_LOGGER container requested count should get reset to 0") + expectContainerRequestedCountReached(APPLICATION_NAME, COMMAND_LOGGER, 0, + CONTAINER_LAUNCH_TIMEOUT) + describe("COMMAND_LOGGER container live count should still be 1") + expectLiveContainerCountReached(APPLICATION_NAME, COMMAND_LOGGER, 1, + CONTAINER_LAUNCH_TIMEOUT) + + describe("New AM is back up. Wait for 30 secs to let existing " + + "COMMAND_LOGGER container to heartbeat back to the new AM.") + sleep(1000 * 30) + + // run upgrade container commands + describe("Call upgrade container - invalid container id") + slider(EXIT_NOT_FOUND, + [ + ACTION_UPGRADE, + APPLICATION_NAME, + ARG_CONTAINERS, + "container_1_invalid" + ]) + describe("Call upgrade container - invalid component name") + slider(EXIT_NOT_FOUND, + [ + ACTION_UPGRADE, + APPLICATION_NAME, + ARG_COMPONENTS, + "component_invalid" + ]) + describe("Call upgrade container - valid component name") + slider(EXIT_SUCCESS, + [ + ACTION_UPGRADE, + APPLICATION_NAME, + ARG_COMPONENTS, + COMMAND_LOGGER + ]) + + // verify + describe("COMMAND_LOGGER container failed count should reach 1") + expectFailedContainerCountReached(APPLICATION_NAME, COMMAND_LOGGER, 1, + CONTAINER_LAUNCH_TIMEOUT) + describe("COMMAND_LOGGER container request count should reach 1") + expectContainerRequestedCountReached(APPLICATION_NAME, COMMAND_LOGGER, 1, + CONTAINER_LAUNCH_TIMEOUT) + describe("COMMAND_LOGGER container live count should reach 1") + expectLiveContainerCountReached(APPLICATION_NAME, COMMAND_LOGGER, 1, + CONTAINER_LAUNCH_TIMEOUT) + + def cd = execStatus(APPLICATION_NAME) + assert cd.statistics[COMMAND_LOGGER][ + StatusKeys.STATISTICS_CONTAINERS_LIVE] == 1 + // check liveness + def liveness = cd.liveness + assert liveness.allRequestsSatisfied + assert 0 == liveness.requestsOutstanding + + assertInYarnState(appId, YarnApplicationState.RUNNING) + } + +}
