Repository: aurora Updated Branches: refs/heads/master 8a4140de5 -> 2cbaeecce
Prioritize adding instances over updating instances during an update Bugs closed: AURORA-1928 Reviewed at https://reviews.apache.org/r/59640/ Project: http://git-wip-us.apache.org/repos/asf/aurora/repo Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/2cbaeecc Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/2cbaeecc Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/2cbaeecc Branch: refs/heads/master Commit: 2cbaeecced903aecfb8359dbb6b0c1d4d91e6ccb Parents: 8a4140d Author: Jordan Ly <[email protected]> Authored: Fri Jun 2 13:58:42 2017 -0700 Committer: David McLaughlin <[email protected]> Committed: Fri Jun 2 13:58:42 2017 -0700 ---------------------------------------------------------------------- RELEASE-NOTES.md | 3 + docs/features/job-updates.md | 6 +- .../aurora/scheduler/updater/UpdateFactory.java | 66 +++++++++++++++++-- .../aurora/scheduler/updater/JobUpdaterIT.java | 53 +++++++-------- .../updater/UpdateFactoryImplTest.java | 68 ++++++++++++++++++++ 5 files changed, 163 insertions(+), 33 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/aurora/blob/2cbaeecc/RELEASE-NOTES.md ---------------------------------------------------------------------- diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index fc77b02..c73643d 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -35,6 +35,9 @@ due to noisy neighbors or machine issues with a task restart. When you have deterministic bin-packing, they may always end up on the same agent. So be careful enabling this without proper monitoring and remediation of host failures. +- Modified job update behavior to create new instances, then update existing instances, and then + kill unwanted instances. Previously, a job update would modify each instance in the order of + their instance ID. 0.17.0 ====== http://git-wip-us.apache.org/repos/asf/aurora/blob/2cbaeecc/docs/features/job-updates.md ---------------------------------------------------------------------- diff --git a/docs/features/job-updates.md b/docs/features/job-updates.md index 60968ae..b52eb35 100644 --- a/docs/features/job-updates.md +++ b/docs/features/job-updates.md @@ -37,16 +37,16 @@ in the current (possibly partially-updated) state. For a configuration update, the Aurora Scheduler calculates required changes by examining the current job config state and the new desired job config. It then starts a *rolling batched update process* by going through every batch -and performing these operations: +and performing these operations, in order: -- If an instance is present in the scheduler but isn't in the new config, - then that instance is killed. - If an instance is not present in the scheduler but is present in the new config, then the instance is created. - If an instance is present in both the scheduler and the new config, then the scheduler diffs both task configs. If it detects any changes, it performs an instance update by killing the old config instance and adds the new config instance. +- If an instance is present in the scheduler but isn't in the new config, + then that instance is killed. The Aurora Scheduler continues through the instance list until all tasks are updated and in `RUNNING`. If the scheduler determines the update is not going http://git-wip-us.apache.org/repos/asf/aurora/blob/2cbaeecc/src/main/java/org/apache/aurora/scheduler/updater/UpdateFactory.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/aurora/scheduler/updater/UpdateFactory.java b/src/main/java/org/apache/aurora/scheduler/updater/UpdateFactory.java index 14c2d2d..b408a65 100644 --- a/src/main/java/org/apache/aurora/scheduler/updater/UpdateFactory.java +++ b/src/main/java/org/apache/aurora/scheduler/updater/UpdateFactory.java @@ -13,6 +13,7 @@ */ package org.apache.aurora.scheduler.updater; +import java.io.Serializable; import java.util.Set; import com.google.common.annotations.VisibleForTesting; @@ -83,12 +84,12 @@ interface UpdateFactory { settings.getUpdateGroupSize() > 0, "Update group size must be positive."); + Set<Integer> currentInstances = expandInstanceIds(instructions.getInitialState()); Set<Integer> desiredInstances = instructions.isSetDesiredState() ? expandInstanceIds(ImmutableSet.of(instructions.getDesiredState())) : ImmutableSet.of(); - Set<Integer> instances = ImmutableSet.copyOf( - Sets.union(expandInstanceIds(instructions.getInitialState()), desiredInstances)); + Set<Integer> instances = ImmutableSet.copyOf(Sets.union(currentInstances, desiredInstances)); ImmutableMap.Builder<Integer, StateEvaluator<Optional<IScheduledTask>>> evaluators = ImmutableMap.builder(); @@ -111,9 +112,10 @@ interface UpdateFactory { clock)); } + Ordering<Integer> updateOrdering = new UpdateOrdering(currentInstances, desiredInstances); Ordering<Integer> updateOrder = rollingForward - ? Ordering.natural() - : Ordering.natural().reverse(); + ? updateOrdering + : updateOrdering.reverse(); UpdateStrategy<Integer> strategy = settings.isWaitForBatchCompletion() ? new BatchStrategy<>(updateOrder, settings.getUpdateGroupSize()) @@ -154,6 +156,62 @@ interface UpdateFactory { } } + /** + * An instance ID ordering that prefers to create new instances first, then update existing + * instances, and finally kill instances. + */ + @VisibleForTesting + class UpdateOrdering extends Ordering<Integer> implements Serializable { + /** + * Associates an instance ID to an action (create, update, or kill) priority. + */ + private final ImmutableMap<Integer, Integer> instanceToActionPriority; + + /** + * Creates an {@link UpdateOrdering}. Determines the action of the instance (create, update, or + * kill) by comparing the current instance IDs against the desired instance IDs after the + * update. + * + * @param currentInstances The current instance IDs. + * @param desiredInstances The desired instance IDs after the update. + */ + UpdateOrdering(Set<Integer> currentInstances, Set<Integer> desiredInstances) { + requireNonNull(desiredInstances); + requireNonNull(currentInstances); + + Set<Integer> toCreate = Sets.difference(desiredInstances, currentInstances); + Set<Integer> toUpdate = Sets.intersection(desiredInstances, currentInstances); + Set<Integer> toKill = Sets.difference(currentInstances, desiredInstances); + + // Build a mapping of ordering priority (lower is more important) to the instance action + // group. Then, we invert it for easy lookup of instance ID to priority. + ImmutableMap.Builder<Integer, Integer> builder = new ImmutableMap.Builder<>(); + ImmutableMap.of( + 1, toCreate, + 2, toUpdate, + 3, toKill + ).forEach((priority, instances) -> instances.forEach(id -> builder.put(id, priority))); + this.instanceToActionPriority = builder.build(); + } + + /** + * {@inheritDoc} + */ + @Override + public int compare(Integer a, Integer b) { + Integer aActionPriority = instanceToActionPriority.get(a); + Integer bActionPriority = instanceToActionPriority.get(b); + + // Try to order by the instance's action. + if (!aActionPriority.equals(bActionPriority)) { + return Integer.compare(aActionPriority, bActionPriority); + } + + // If it is the same action, order the IDs numerically. + return Integer.compare(a, b); + } + } + class Update { private final OneWayJobUpdater<Integer, Optional<IScheduledTask>> updater; private final JobUpdateStatus successStatus; http://git-wip-us.apache.org/repos/asf/aurora/blob/2cbaeecc/src/test/java/org/apache/aurora/scheduler/updater/JobUpdaterIT.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/aurora/scheduler/updater/JobUpdaterIT.java b/src/test/java/org/apache/aurora/scheduler/updater/JobUpdaterIT.java index 290385d..04a6e1e 100644 --- a/src/test/java/org/apache/aurora/scheduler/updater/JobUpdaterIT.java +++ b/src/test/java/org/apache/aurora/scheduler/updater/JobUpdaterIT.java @@ -917,17 +917,17 @@ public class JobUpdaterIT extends EasyMockTest { ImmutableMultimap.Builder<Integer, JobUpdateAction> actions = ImmutableMultimap.builder(); - // Instance 0 is updated. + // Instance 1 is added. updater.start(update, AUDIT); - actions.putAll(0, INSTANCE_UPDATING); + actions.putAll(1, INSTANCE_UPDATING); assertState(ROLLING_FORWARD, actions.build()); - changeState(JOB, 0, KILLED, ASSIGNED, STARTING, RUNNING); + changeState(JOB, 1, ASSIGNED, STARTING, RUNNING); clock.advance(WATCH_TIMEOUT); - // Instance 1 is added. - changeState(JOB, 1, ASSIGNED, STARTING, RUNNING); - actions.putAll(0, INSTANCE_UPDATED) - .putAll(1, INSTANCE_UPDATING, INSTANCE_UPDATED); + // Instance 0 is updated. + changeState(JOB, 0, KILLED, ASSIGNED, STARTING, RUNNING); + actions.putAll(1, INSTANCE_UPDATED) + .putAll(0, INSTANCE_UPDATING, INSTANCE_UPDATED); clock.advance(WATCH_TIMEOUT); // Instance 2 is updated, but fails. @@ -940,7 +940,7 @@ public class JobUpdaterIT extends EasyMockTest { assertState(ROLLING_BACK, actions.build()); assertLatestUpdateMessage(JobUpdateControllerImpl.failureMessage(2, Failure.EXITED)); changeState(JOB, 2, ASSIGNED, STARTING, RUNNING); - actions.putAll(1, INSTANCE_ROLLING_BACK) + actions.putAll(0, INSTANCE_ROLLING_BACK) .putAll(2, INSTANCE_ROLLED_BACK); clock.advance(WATCH_TIMEOUT); @@ -951,14 +951,14 @@ public class JobUpdaterIT extends EasyMockTest { updater.resume(UPDATE_ID, AUDIT); assertState(ROLLING_BACK, actions.build()); - // Instance 1 is removed. - changeState(JOB, 1, KILLED); - actions.putAll(1, INSTANCE_ROLLED_BACK); - clock.advance(WATCH_TIMEOUT); - // Instance 0 is rolled back. changeState(JOB, 0, KILLED, ASSIGNED, STARTING, RUNNING); - actions.putAll(0, INSTANCE_ROLLING_BACK, INSTANCE_ROLLED_BACK); + actions.putAll(0, INSTANCE_ROLLED_BACK); + clock.advance(WATCH_TIMEOUT); + + // Instance 1 is removed. + changeState(JOB, 1, KILLED); + actions.putAll(1, INSTANCE_ROLLING_BACK, INSTANCE_ROLLED_BACK); clock.advance(WATCH_TIMEOUT); assertState(ROLLED_BACK, actions.build()); @@ -986,17 +986,17 @@ public class JobUpdaterIT extends EasyMockTest { ImmutableMultimap.Builder<Integer, JobUpdateAction> actions = ImmutableMultimap.builder(); - // Instance 0 is updated. + // Instance 1 is added. updater.start(update, AUDIT); - actions.putAll(0, INSTANCE_UPDATING); + actions.putAll(1, INSTANCE_UPDATING); assertState(ROLLING_FORWARD, actions.build()); - changeState(JOB, 0, KILLED, ASSIGNED, STARTING, RUNNING); + changeState(JOB, 1, ASSIGNED, STARTING, RUNNING); clock.advance(WATCH_TIMEOUT); - // Instance 1 is added. - changeState(JOB, 1, ASSIGNED, STARTING, RUNNING); - actions.putAll(0, INSTANCE_UPDATED) - .putAll(1, INSTANCE_UPDATING, INSTANCE_UPDATED); + // Instance 0 is updated. + changeState(JOB, 0, KILLED, ASSIGNED, STARTING, RUNNING); + actions.putAll(1, INSTANCE_UPDATED) + .putAll(0, INSTANCE_UPDATING, INSTANCE_UPDATED); clock.advance(WATCH_TIMEOUT); // Instance 2 is updated, but fails. @@ -1045,11 +1045,12 @@ public class JobUpdaterIT extends EasyMockTest { control.replay(); IJobUpdate update = makeJobUpdate( - makeInstanceConfig(0, 1, OLD_CONFIG)); + makeInstanceConfig(0, 2, OLD_CONFIG)); insertInitialTasks(update); changeState(JOB, 0, ASSIGNED, STARTING, RUNNING); changeState(JOB, 1, ASSIGNED, STARTING, RUNNING); + changeState(JOB, 2, ASSIGNED, STARTING, RUNNING); clock.advance(WATCH_TIMEOUT); ImmutableMultimap.Builder<Integer, JobUpdateAction> actions = ImmutableMultimap.builder(); @@ -1077,7 +1078,7 @@ public class JobUpdaterIT extends EasyMockTest { actions.putAll(1, INSTANCE_ROLLBACK_FAILED); assertState(JobUpdateStatus.FAILED, actions.build()); clock.advance(WATCH_TIMEOUT); - assertJobState(JOB, ImmutableMap.of(0, NEW_CONFIG, 1, OLD_CONFIG)); + assertJobState(JOB, ImmutableMap.of(0, NEW_CONFIG, 1, OLD_CONFIG, 2, OLD_CONFIG)); } private void releaseAllLocks() { @@ -1093,7 +1094,7 @@ public class JobUpdaterIT extends EasyMockTest { control.replay(); IJobUpdate update = makeJobUpdate( - makeInstanceConfig(0, 1, OLD_CONFIG)); + makeInstanceConfig(0, 2, OLD_CONFIG)); insertInitialTasks(update); changeState(JOB, 0, ASSIGNED, STARTING, RUNNING); @@ -1390,7 +1391,7 @@ public class JobUpdaterIT extends EasyMockTest { control.replay(); - IJobUpdate update = makeJobUpdate(makeInstanceConfig(0, 0, OLD_CONFIG)); + IJobUpdate update = makeJobUpdate(makeInstanceConfig(0, 2, OLD_CONFIG)); insertInitialTasks(update); changeState(JOB, 0, ASSIGNED, STARTING, RUNNING); @@ -1417,7 +1418,7 @@ public class JobUpdaterIT extends EasyMockTest { control.replay(); - IJobUpdate update = makeJobUpdate(makeInstanceConfig(0, 0, OLD_CONFIG)); + IJobUpdate update = makeJobUpdate(makeInstanceConfig(0, 2, OLD_CONFIG)); insertInitialTasks(update); changeState(JOB, 0, ASSIGNED, STARTING, RUNNING); http://git-wip-us.apache.org/repos/asf/aurora/blob/2cbaeecc/src/test/java/org/apache/aurora/scheduler/updater/UpdateFactoryImplTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/aurora/scheduler/updater/UpdateFactoryImplTest.java b/src/test/java/org/apache/aurora/scheduler/updater/UpdateFactoryImplTest.java index 611f6b8..01bdcf1 100644 --- a/src/test/java/org/apache/aurora/scheduler/updater/UpdateFactoryImplTest.java +++ b/src/test/java/org/apache/aurora/scheduler/updater/UpdateFactoryImplTest.java @@ -13,7 +13,10 @@ */ package org.apache.aurora.scheduler.updater; +import java.util.Set; + import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; import org.apache.aurora.common.util.testing.FakeClock; import org.apache.aurora.gen.InstanceTaskConfig; @@ -27,6 +30,7 @@ import org.junit.Test; import static org.apache.aurora.scheduler.updater.UpdateFactory.Update; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; /** * This test can't exercise much functionality of the output from the factory without duplicating @@ -95,6 +99,70 @@ public class UpdateFactoryImplTest { assertEquals(ImmutableSet.of(0, 1, 2), update.getUpdater().getInstances()); } + @Test + public void testUpdateOrdering() throws Exception { + Set<Integer> currentInstances = ImmutableSet.of(0, 1, 2, 3); + Set<Integer> desiredInstances = ImmutableSet.of(0, 1, 2, 3); + + UpdateFactory.UpdateOrdering ordering = new UpdateFactory.UpdateOrdering( + currentInstances, + desiredInstances); + + // Rolling forward + assertTrue(ordering.isOrdered(Lists.newArrayList(0, 1, 2, 3))); + + // Rolling backward + assertTrue(ordering.reverse().isOrdered(Lists.newArrayList(3, 2, 1, 0))); + } + + @Test + public void testUpdateCreateOrdering() throws Exception { + Set<Integer> currentInstances = ImmutableSet.of(0, 1, 2, 3); + Set<Integer> desiredInstances = ImmutableSet.of(0, 1, 2, 3, 4, 5); + + UpdateFactory.UpdateOrdering ordering = new UpdateFactory.UpdateOrdering( + currentInstances, + desiredInstances); + + // Rolling forward + assertTrue(ordering.isOrdered(Lists.newArrayList(4, 5, 0, 1, 2, 3))); + + // Rolling backward + assertTrue(ordering.reverse().isOrdered(Lists.newArrayList(3, 2, 1, 0, 5, 4))); + } + + @Test + public void testUpdateKillOrdering() throws Exception { + Set<Integer> currentInstances = ImmutableSet.of(0, 1, 2, 3); + Set<Integer> desiredInstances = ImmutableSet.of(0, 1); + + UpdateFactory.UpdateOrdering ordering = new UpdateFactory.UpdateOrdering( + currentInstances, + desiredInstances); + + // Rolling forward + assertTrue(ordering.isOrdered(Lists.newArrayList(0, 1, 2, 3))); + + // Rolling backward + assertTrue(ordering.reverse().isOrdered(Lists.newArrayList(3, 2, 1, 0))); + } + + @Test + public void testCreateUpdateKillOrdering() throws Exception { + Set<Integer> currentInstances = ImmutableSet.of(0, 2, 4, 5, 6); + Set<Integer> desiredInstances = ImmutableSet.of(0, 1, 2, 3); + + UpdateFactory.UpdateOrdering ordering = new UpdateFactory.UpdateOrdering( + currentInstances, + desiredInstances); + + // Rolling forward + assertTrue(ordering.isOrdered(Lists.newArrayList(1, 3, 0, 2, 5, 6))); + + // Rolling backward + assertTrue(ordering.reverse().isOrdered(Lists.newArrayList(6, 5, 2, 0, 3, 1))); + } + private static InstanceTaskConfig instanceConfig(Range instances) { return new InstanceTaskConfig() .setTask(new TaskConfig())
