SLIDER-327 AM to move to optimized, queued Node review and refresh action
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/88b665db Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/88b665db Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/88b665db Branch: refs/heads/feature/SLIDER-280_Restart_AM_fun_tests Commit: 88b665db938325bf2c221614f7ffadefe71be632 Parents: 82cf1f0 Author: Steve Loughran <[email protected]> Authored: Tue Sep 2 21:01:50 2014 +0100 Committer: Steve Loughran <[email protected]> Committed: Tue Sep 2 21:01:50 2014 +0100 ---------------------------------------------------------------------- .../org/apache/slider/client/SliderClient.java | 9 +-- .../server/appmaster/RoleLaunchService.java | 8 +-- .../server/appmaster/SliderAppMaster.java | 66 ++++++++++++++------ .../server/appmaster/actions/ActionHalt.java | 6 +- .../appmaster/actions/ActionKillContainer.java | 20 +++++- .../appmaster/actions/ActionStartContainer.java | 8 ++- .../appmaster/actions/ActionStopSlider.java | 6 +- .../server/appmaster/actions/AsyncAction.java | 47 ++++---------- .../actions/ProviderReportedContainerLoss.java | 13 ++-- .../actions/ProviderStartupCompleted.java | 4 -- .../server/appmaster/actions/QueueAccess.java | 5 ++ .../server/appmaster/actions/QueueService.java | 11 +++- .../actions/RegisterComponentInstance.java | 5 ++ .../actions/ReviewAndFlexApplicationSize.java | 43 +++++++++++++ .../actions/UnregisterComponentInstance.java | 7 ++- .../server/appmaster/monkey/ChaosEntry.java | 2 +- .../TestStandaloneAMMonkeyRestart.groovy | 8 +-- .../providers/accumulo/AccumuloTestBase.groovy | 4 +- .../minicluster/HBaseMiniClusterTestBase.groovy | 1 - .../flexing/TestClusterFlex1To1.groovy | 2 +- 20 files changed, 176 insertions(+), 99 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-core/src/main/java/org/apache/slider/client/SliderClient.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/client/SliderClient.java b/slider-core/src/main/java/org/apache/slider/client/SliderClient.java index f0da136..f6d761e 100644 --- a/slider-core/src/main/java/org/apache/slider/client/SliderClient.java +++ b/slider-core/src/main/java/org/apache/slider/client/SliderClient.java @@ -2002,12 +2002,9 @@ public class SliderClient extends AbstractSliderLaunchedService implements RunSe log.info("Flexing running cluster"); SliderClusterProtocol appMaster = connect(instance); SliderClusterOperations clusterOps = new SliderClusterOperations(appMaster); - if (clusterOps.flex(instanceDefinition.getResources())) { - log.info("Cluster size updated"); - exitCode = EXIT_SUCCESS; - } else { - log.info("Requested size is the same as current size: no change"); - } + clusterOps.flex(instanceDefinition.getResources()); + log.info("application instance size updated"); + exitCode = EXIT_SUCCESS; } else { log.info("No running instance to update"); } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-core/src/main/java/org/apache/slider/server/appmaster/RoleLaunchService.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/RoleLaunchService.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/RoleLaunchService.java index e8b6802..37824c8 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/RoleLaunchService.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/RoleLaunchService.java @@ -29,7 +29,6 @@ import org.apache.slider.core.launch.ContainerLauncher; import org.apache.slider.providers.ProviderRole; import org.apache.slider.providers.ProviderService; import org.apache.slider.server.appmaster.actions.ActionStartContainer; -import org.apache.slider.server.appmaster.actions.AsyncAction; import org.apache.slider.server.appmaster.actions.QueueAccess; import org.apache.slider.server.appmaster.state.RoleInstance; import org.apache.slider.server.appmaster.state.RoleStatus; @@ -39,9 +38,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Map; -import java.util.Queue; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; /** * A service for launching containers @@ -215,9 +214,8 @@ public class RoleLaunchService instance.roleId = role.id; instance.environment = envDescription; actionQueue.put(new ActionStartContainer("starting " + containerRole, - 0, container, - containerLauncher.completeContainerLaunch(), - instance)); + container, containerLauncher.completeContainerLaunch(), instance, 0, + TimeUnit.MILLISECONDS)); } catch (Exception e) { log.error("Exception thrown while trying to start {}: {}", containerRole, e); http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java index 51c3b93..9ccb382 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java @@ -106,6 +106,7 @@ import org.apache.slider.server.appmaster.actions.ActionStopSlider; import org.apache.slider.server.appmaster.actions.AsyncAction; import org.apache.slider.server.appmaster.actions.RenewingAction; import org.apache.slider.server.appmaster.actions.ResetFailureWindow; +import org.apache.slider.server.appmaster.actions.ReviewAndFlexApplicationSize; import org.apache.slider.server.appmaster.actions.UnregisterComponentInstance; import org.apache.slider.server.appmaster.monkey.ChaosKillAM; import org.apache.slider.server.appmaster.monkey.ChaosKillContainer; @@ -160,9 +161,6 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; -import static org.apache.slider.server.appmaster.web.rest.RestPaths.WS_AGENT_CONTEXT_ROOT; -import static org.apache.slider.server.appmaster.web.rest.RestPaths.WS_CONTEXT_ROOT; - /** * This is the AM, which directly implements the callbacks from the AM and NM */ @@ -1211,11 +1209,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService queue(new UnregisterComponentInstance(containerId, 0, TimeUnit.MILLISECONDS)); } - try { - reviewRequestAndReleaseNodes(); - } catch (SliderInternalStateException e) { - log.warn("Exception while flexing nodes", e); - } + reviewRequestAndReleaseNodes("onContainersCompleted"); } /** @@ -1223,10 +1217,9 @@ public class SliderAppMaster extends AbstractSliderLaunchedService * It should be the only way that anything -even the AM itself on startup- * asks for nodes. * @param resources the resource tree - * @return true if the any requests were made * @throws IOException */ - private boolean flexCluster(ConfTree resources) + private void flexCluster(ConfTree resources) throws IOException, SliderInternalStateException, BadConfigException { appState.updateResourceDefinitions(resources); @@ -1238,7 +1231,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService // ask for more containers if needed - return reviewRequestAndReleaseNodes(); + reviewRequestAndReleaseNodes("flexCluster"); } /** @@ -1269,13 +1262,47 @@ public class SliderAppMaster extends AbstractSliderLaunchedService /** * Look at where the current node state is -and whether it should be changed + * @param reason + */ + private synchronized void reviewRequestAndReleaseNodes(String reason) { + log.debug("reviewRequestAndReleaseNodes({})", reason); + queue(new ReviewAndFlexApplicationSize(reason, 0, TimeUnit.SECONDS)); + } + + /** + * Handle the event requesting a review ... look at the queue and decide + * whether to act or not + * @param action action triggering the event. It may be put + * back into the queue + * @throws SliderInternalStateException */ - private synchronized boolean reviewRequestAndReleaseNodes() + public void handleReviewAndFlexApplicationSize(ReviewAndFlexApplicationSize action) throws SliderInternalStateException { - log.debug("in reviewRequestAndReleaseNodes()"); + + if ( actionQueues.hasQueuedActionWithAttribute( + AsyncAction.ATTR_REVIEWS_APP_SIZE | AsyncAction.ATTR_HALTS_APP)) { + // this operation isn't needed at all -existing duplicate or shutdown due + return; + } + // if there is an action which changes cluster size, wait + if (actionQueues.hasQueuedActionWithAttribute( + AsyncAction.ATTR_CHANGES_APP_SIZE)) { + // place the action at the back of the queue + actionQueues.put(action); + } + + executeNodeReview(action.name); + } + + /** + * Look at where the current node state is -and whether it should be changed + */ + public synchronized void executeNodeReview(String reason) + throws SliderInternalStateException { + + log.debug("in executeNodeReview({})", reason); if (amCompletionFlag.get()) { log.info("Ignoring node review operation: shutdown in progress"); - return false; } try { List<AbstractRMOperation> allOperations = appState.reviewRequestAndReleaseNodes(); @@ -1283,16 +1310,17 @@ public class SliderAppMaster extends AbstractSliderLaunchedService providerRMOperationHandler.execute(allOperations); //now apply the operations executeRMOperations(allOperations); - return !allOperations.isEmpty(); } catch (TriggerClusterTeardownException e) { //App state has decided that it is time to exit log.error("Cluster teardown triggered %s", e); signalAMComplete(e.getExitCode(), e.toString()); - return false; } } + + + /** * Shutdown operation: release all containers */ @@ -1383,8 +1411,8 @@ public class SliderAppMaster extends AbstractSliderLaunchedService String payload = request.getClusterSpec(); ConfTreeSerDeser confTreeSerDeser = new ConfTreeSerDeser(); ConfTree updatedResources = confTreeSerDeser.fromJson(payload); - boolean flexed = flexCluster(updatedResources); - return Messages.FlexClusterResponseProto.newBuilder().setResponse(flexed).build(); + flexCluster(updatedResources); + return Messages.FlexClusterResponseProto.newBuilder().setResponse(true).build(); } @Override //SliderClusterProtocol @@ -1609,7 +1637,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService executeRMOperations(appState.releaseContainer(containerId)); // ask for more containers if needed log.info("Container released; triggering review"); - reviewRequestAndReleaseNodes(); + reviewRequestAndReleaseNodes("Loss of container"); } else { log.info("Container not in active set - ignoring"); } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionHalt.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionHalt.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionHalt.java index c21e249..e2ad559 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionHalt.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionHalt.java @@ -25,7 +25,7 @@ import org.apache.slider.server.appmaster.state.AppState; import java.util.concurrent.TimeUnit; /** - * Exit a JVM halt. + * Exit an emergency JVM halt. * @see ExitUtil#halt(int, String) */ public class ActionHalt extends AsyncAction { @@ -37,7 +37,9 @@ public class ActionHalt extends AsyncAction { int status, String text, long delay, TimeUnit timeUnit) { - super("Halt", delay, ActionAttributes.HALTS_CLUSTER); + + // do not declare that this action halts the cluster ... keep it a surprise + super("Halt", delay, timeUnit); this.status = status; this.text = text; } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionKillContainer.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionKillContainer.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionKillContainer.java index c1e7e6e..95bf067 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionKillContainer.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionKillContainer.java @@ -30,16 +30,34 @@ import java.util.LinkedList; import java.util.List; import java.util.concurrent.TimeUnit; +/** + * Kill a specific container + */ public class ActionKillContainer extends AsyncAction { + /** + * container to kill + */ private final ContainerId containerId; + + /** + * handler for the operation + */ private final RMOperationHandler operationHandler; + + /** + * Kill a container + * @param containerId container to kill + * @param delay + * @param timeUnit + * @param operationHandler + */ public ActionKillContainer( ContainerId containerId, long delay, TimeUnit timeUnit, RMOperationHandler operationHandler) { - super("kill container", delay, timeUnit); + super("kill container", delay, timeUnit, ATTR_CHANGES_APP_SIZE); this.operationHandler = operationHandler; Preconditions.checkArgument(containerId != null); http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStartContainer.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStartContainer.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStartContainer.java index d95dc74..358c844 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStartContainer.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStartContainer.java @@ -25,6 +25,7 @@ import org.apache.slider.server.appmaster.state.AppState; import org.apache.slider.server.appmaster.state.RoleInstance; import java.util.Locale; +import java.util.concurrent.TimeUnit; /** * Start a container @@ -37,15 +38,16 @@ public class ActionStartContainer extends AsyncAction { private final RoleInstance instance; public ActionStartContainer(String name, - long delay, Container container, ContainerLaunchContext ctx, - RoleInstance instance) { + RoleInstance instance, + long delay, TimeUnit timeUnit) { super( String.format(Locale.ENGLISH, "%s %s: /", name , container.getId().toString()), - delay); + delay, + timeUnit); this.container = container; this.ctx = ctx; this.instance = instance; http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java index f084383..64b8e9e 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java @@ -25,15 +25,11 @@ import org.apache.slider.server.appmaster.state.AppState; import java.util.concurrent.TimeUnit; public class ActionStopSlider extends AsyncAction { - public ActionStopSlider(String message, - long delay) { - super(message, delay, ActionAttributes.HALTS_CLUSTER); - } public ActionStopSlider(String name, long delay, TimeUnit timeUnit) { - super(name, delay, timeUnit, ActionAttributes.HALTS_CLUSTER); + super(name, delay, timeUnit, ATTR_HALTS_APP); } @Override http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/AsyncAction.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/AsyncAction.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/AsyncAction.java index 996390d..c8db42d 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/AsyncAction.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/AsyncAction.java @@ -23,8 +23,6 @@ import org.apache.slider.server.appmaster.SliderAppMaster; import org.apache.slider.server.appmaster.state.AppState; import java.io.IOException; -import java.util.Collections; -import java.util.EnumSet; import java.util.concurrent.Delayed; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; @@ -35,7 +33,7 @@ public abstract class AsyncAction implements Delayed { public final String name; private long nanos; - private final EnumSet<ActionAttributes> attrs; + public final int attrs; private final long sequenceNumber = sequencer.incrementAndGet(); @@ -51,34 +49,18 @@ public abstract class AsyncAction implements Delayed { protected AsyncAction(String name, long delay, TimeUnit timeUnit) { - this.name = name; - this.setNanos(convertAndOffset(delay, timeUnit)); - attrs = EnumSet.noneOf(ActionAttributes.class); + this(name, delay, timeUnit, 0); } protected AsyncAction(String name, long delay, TimeUnit timeUnit, - EnumSet<ActionAttributes> attrs) { + int attrs) { this.name = name; this.setNanos(convertAndOffset(delay, timeUnit)); this.attrs = attrs; } - protected AsyncAction(String name, - long delay, - TimeUnit timeUnit, - ActionAttributes... attributes) { - this(name, delay, timeUnit); - Collections.addAll(attrs, attributes); - } - - protected AsyncAction(String name, - long delayMillis, - ActionAttributes... attributes) { - this(name, delayMillis, TimeUnit.MILLISECONDS); - } - protected long convertAndOffset(long delay, TimeUnit timeUnit) { return now() + TimeUnit.NANOSECONDS.convert(delay, timeUnit); } @@ -118,17 +100,18 @@ public abstract class AsyncAction implements Delayed { return sb.toString(); } - protected EnumSet<ActionAttributes> getAttrs() { + protected int getAttrs() { return attrs; } /** - * Ask if an action has a specific attribute + * Ask if an action has an of the specified bits set. + * This is not an equality test. * @param attr attribute - * @return true iff the action has the specific attribute + * @return true iff the action has any of the bits in the attr arg set */ - public boolean hasAttr(ActionAttributes attr) { - return attrs.contains(attr); + public boolean hasAttr(int attr) { + return (attrs & attr) != 0; } /** @@ -148,12 +131,8 @@ public abstract class AsyncAction implements Delayed { public void setNanos(long nanos) { this.nanos = nanos; } - - public enum ActionAttributes { - SHRINKS_CLUSTER, - EXPANDS_CLUSTER, - HALTS_CLUSTER, - } - - + + public static final int ATTR_CHANGES_APP_SIZE = 1; + public static final int ATTR_HALTS_APP = 2; + public static final int ATTR_REVIEWS_APP_SIZE = 4; } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderReportedContainerLoss.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderReportedContainerLoss.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderReportedContainerLoss.java index 2aa67bb..41fe494 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderReportedContainerLoss.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderReportedContainerLoss.java @@ -22,6 +22,8 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.slider.server.appmaster.SliderAppMaster; import org.apache.slider.server.appmaster.state.AppState; +import java.util.concurrent.TimeUnit; + /** * Report container loss to the AM * {@link SliderAppMaster#providerLostContainer(ContainerId)} @@ -31,13 +33,14 @@ public class ProviderReportedContainerLoss extends AsyncAction { private final ContainerId containerId; public ProviderReportedContainerLoss(ContainerId containerId) { - super("lost container " + containerId); - this.containerId = containerId; + this("lost container", 0, TimeUnit.MILLISECONDS, containerId); } - public ProviderReportedContainerLoss( - ContainerId containerId, long delayMillis) { - super("lost container " + containerId, delayMillis); + public ProviderReportedContainerLoss(String name, + long delay, + TimeUnit timeUnit, + ContainerId containerId) { + super(name, delay, timeUnit); this.containerId = containerId; } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderStartupCompleted.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderStartupCompleted.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderStartupCompleted.java index 4577025..957a35f 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderStartupCompleted.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderStartupCompleted.java @@ -27,10 +27,6 @@ public class ProviderStartupCompleted extends AsyncAction { super("ProviderStartupCompleted"); } - public ProviderStartupCompleted(long delayMillis) { - super("ProviderStartupCompleted", delayMillis); - } - @Override public void execute(SliderAppMaster appMaster, QueueAccess queueService, http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueAccess.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueAccess.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueAccess.java index cffaf5e..0396891 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueAccess.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueAccess.java @@ -64,4 +64,9 @@ public interface QueueAccess { * @return true if the action was found and removed. */ boolean removeRenewingAction(String name); + + /** + * Look in the immediate queue for any actions of a specific attribute + */ + boolean hasQueuedActionWithAttribute(int attr); } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueService.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueService.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueService.java index 6ad579d..5b24a35 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueService.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueService.java @@ -19,7 +19,6 @@ package org.apache.slider.server.appmaster.actions; -import com.google.common.annotations.VisibleForTesting; import org.apache.slider.server.services.workflow.ServiceThreadFactory; import org.apache.slider.server.services.workflow.WorkflowExecutorService; import org.slf4j.Logger; @@ -149,6 +148,16 @@ implements Runnable, QueueAccess { } } } + + @Override + public boolean hasQueuedActionWithAttribute(int attr) { + for (AsyncAction action : actionQueue) { + if (action.hasAttr(attr)) { + return true; + } + } + return false; + } /** * Run until the queue has been told to stop http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/RegisterComponentInstance.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/RegisterComponentInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/RegisterComponentInstance.java index a8a6fe2..543c1a8 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/RegisterComponentInstance.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/RegisterComponentInstance.java @@ -25,6 +25,11 @@ import org.apache.slider.server.appmaster.state.AppState; import java.util.concurrent.TimeUnit; +/** + * Notify the app master that it should register a component instance + * in the registry + * {@link SliderAppMaster#registerComponent(ContainerId)} + */ public class RegisterComponentInstance extends AsyncAction { http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ReviewAndFlexApplicationSize.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ReviewAndFlexApplicationSize.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ReviewAndFlexApplicationSize.java new file mode 100644 index 0000000..273f599 --- /dev/null +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ReviewAndFlexApplicationSize.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.slider.server.appmaster.actions; + +import org.apache.slider.server.appmaster.SliderAppMaster; +import org.apache.slider.server.appmaster.state.AppState; + +import java.util.concurrent.TimeUnit; + +/** + * Tell the AM to execute the full flex review operation + */ +public class ReviewAndFlexApplicationSize extends AsyncAction{ + + public ReviewAndFlexApplicationSize(String name, + long delay, + TimeUnit timeUnit) { + super(name, delay, timeUnit, ATTR_REVIEWS_APP_SIZE); + } + + @Override + public void execute(SliderAppMaster appMaster, + QueueAccess queueService, + AppState appState) throws Exception { + appMaster.handleReviewAndFlexApplicationSize(this); + } +} http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/UnregisterComponentInstance.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/UnregisterComponentInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/UnregisterComponentInstance.java index 78d9c1c..575fe8f 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/UnregisterComponentInstance.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/UnregisterComponentInstance.java @@ -24,12 +24,17 @@ import org.apache.slider.server.appmaster.state.AppState; import java.util.concurrent.TimeUnit; +/** + * Tell AM to unregister this component instance + * {@link SliderAppMaster#unregisterComponent(ContainerId)} + */ public class UnregisterComponentInstance extends AsyncAction { public final ContainerId containerId; - public UnregisterComponentInstance(ContainerId containerId, long delay, + public UnregisterComponentInstance(ContainerId containerId, + long delay, TimeUnit timeUnit) { super("UnregisterComponentInstance :" + containerId.toString(), delay, timeUnit); http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosEntry.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosEntry.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosEntry.java index 5905d2f..2869fe9 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosEntry.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosEntry.java @@ -52,7 +52,7 @@ public class ChaosEntry { Preconditions.checkArgument(target != null, "null target"); Preconditions.checkArgument(probability > 0, "negative probability"); Preconditions.checkArgument(probability <= ChaosMonkeyService.PERCENT_100, - "probability over 100%"); + "probability over 100%: "+ probability); this.name = name; this.target = target; this.probability = probability; http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMMonkeyRestart.groovy ---------------------------------------------------------------------- diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMMonkeyRestart.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMMonkeyRestart.groovy index 5b17cfa..b37b9a6 100644 --- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMMonkeyRestart.groovy +++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMMonkeyRestart.groovy @@ -20,28 +20,22 @@ package org.apache.slider.agent.standalone import groovy.transform.CompileStatic import groovy.util.logging.Slf4j -import org.apache.hadoop.SleepJob import org.apache.hadoop.yarn.api.records.ApplicationReport import org.apache.hadoop.yarn.api.records.FinalApplicationStatus import org.apache.hadoop.yarn.api.records.YarnApplicationState import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.slider.agent.AgentMiniClusterTestBase import org.apache.slider.api.InternalKeys -import org.apache.slider.api.ResourceKeys import org.apache.slider.client.SliderClient import org.apache.slider.common.SliderXmlConfKeys -import org.apache.slider.common.params.ActionAMSuicideArgs import org.apache.slider.common.params.Arguments -import org.apache.slider.core.exceptions.ErrorStrings import org.apache.slider.core.main.ServiceLauncher import org.junit.Test @CompileStatic @Slf4j - class TestStandaloneAMMonkeyRestart extends AgentMiniClusterTestBase { - @Test public void testStandaloneAMMonkeyRestart() throws Throwable { describe "Run a Standalone AM with the Chaos monkey set to kill it" @@ -55,7 +49,7 @@ class TestStandaloneAMMonkeyRestart extends AgentMiniClusterTestBase { [ Arguments.ARG_OPTION, InternalKeys.CHAOS_MONKEY_ENABLED, "true", Arguments.ARG_OPTION, InternalKeys.CHAOS_MONKEY_INTERVAL_SECONDS, "8", - Arguments.ARG_OPTION, InternalKeys.CHAOS_MONKEY_PROBABILITY_AM_FAILURE, "75000", + Arguments.ARG_OPTION, InternalKeys.CHAOS_MONKEY_PROBABILITY_AM_FAILURE, "7500", ], true, false) http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/AccumuloTestBase.groovy ---------------------------------------------------------------------- diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/AccumuloTestBase.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/AccumuloTestBase.groovy index 93b2798..3c5606b 100644 --- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/AccumuloTestBase.groovy +++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/AccumuloTestBase.groovy @@ -228,9 +228,7 @@ public abstract class AccumuloTestBase extends YarnZKMiniClusterTestBase { //now flex describe( "Flexing " + roleMapToString(flexTarget)); - boolean flexed = 0 == sliderClient.flex(clustername, - flexTarget - ); + sliderClient.flex(clustername, flexTarget); cd = waitForRoleCount(sliderClient, flexTarget, accumulo_cluster_startup_to_live_time); http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/HBaseMiniClusterTestBase.groovy ---------------------------------------------------------------------- diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/HBaseMiniClusterTestBase.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/HBaseMiniClusterTestBase.groovy index 10de9ac..7712a83 100644 --- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/HBaseMiniClusterTestBase.groovy +++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/HBaseMiniClusterTestBase.groovy @@ -353,7 +353,6 @@ public abstract class HBaseMiniClusterTestBase extends YarnZKMiniClusterTestBase int masterFlexTarget, int workerFlexTarget, boolean testHBaseAfter) { - int flexTarget describe( "Flexing masters -> $masterFlexTarget ; workers -> ${workerFlexTarget}"); boolean flexed; http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/88b665db/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To1.groovy ---------------------------------------------------------------------- diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To1.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To1.groovy index fdbbce8..c1265de 100644 --- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To1.groovy +++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To1.groovy @@ -33,7 +33,7 @@ class TestClusterFlex1To1 extends HBaseMiniClusterTestBase { @Test public void testClusterFlex1To1() throws Throwable { - assert !flexHBaseClusterTestRun( + assert flexHBaseClusterTestRun( "", 1, 1,
