Cluster Maintenance Mode feature support Helix does not have a state that keep original partitions are active status without doing rebalance partition placement. The only state controller can switch to is paused state. If the controller has been paused, all the replicas in this cluster will not be active anymore. It is better to have another mode that let current replicas functioning well without new replicas bootstraps when the cluster is full or instance. There are several scenarios that may need such mode that keep original assignment of partitions without partition movement. At same time, no partition will be assigned for newly added resources. This mode is call cluster maintenance mode.
For more detail, please refer: https://iwww.corp.linkedin.com/wiki/cf/display/ENGS/Cluster+Maintenance+Mode+Design Project: http://git-wip-us.apache.org/repos/asf/helix/repo Commit: http://git-wip-us.apache.org/repos/asf/helix/commit/a7477c3b Tree: http://git-wip-us.apache.org/repos/asf/helix/tree/a7477c3b Diff: http://git-wip-us.apache.org/repos/asf/helix/diff/a7477c3b Branch: refs/heads/master Commit: a7477c3bbc85059b2e522f5caa214c33eb4c3e15 Parents: bd9f7a4 Author: Junkai Xue <[email protected]> Authored: Thu Nov 2 15:00:24 2017 -0700 Committer: Junkai Xue <[email protected]> Committed: Wed Jan 24 18:31:11 2018 -0800 ---------------------------------------------------------------------- .../main/java/org/apache/helix/HelixAdmin.java | 15 ++++ .../org/apache/helix/HelixDataAccessor.java | 2 + .../main/java/org/apache/helix/PropertyKey.java | 9 +++ .../org/apache/helix/PropertyPathBuilder.java | 20 +++-- .../java/org/apache/helix/PropertyType.java | 1 + .../rebalancer/AbstractRebalancer.java | 35 ++++++++- .../rebalancer/DelayedAutoRebalancer.java | 34 -------- .../rebalancer/MaintenanceRebalancer.java | 43 +++++++++++ .../stages/BestPossibleStateCalcStage.java | 17 ++-- .../controller/stages/ClusterDataCache.java | 9 +++ .../apache/helix/manager/zk/ZKHelixAdmin.java | 29 ++++++- .../helix/manager/zk/ZKHelixDataAccessor.java | 8 ++ .../apache/helix/model/MaintenanceSignal.java | 13 ++++ .../java/org/apache/helix/MockAccessor.java | 5 ++ .../controller/TestClusterMaintenanceMode.java | 81 ++++++++++++++++++++ .../org/apache/helix/mock/MockHelixAdmin.java | 8 ++ 16 files changed, 277 insertions(+), 52 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/HelixAdmin.java ---------------------------------------------------------------------- diff --git a/helix-core/src/main/java/org/apache/helix/HelixAdmin.java b/helix-core/src/main/java/org/apache/helix/HelixAdmin.java index 652ab7a..9562a0b 100644 --- a/helix-core/src/main/java/org/apache/helix/HelixAdmin.java +++ b/helix-core/src/main/java/org/apache/helix/HelixAdmin.java @@ -259,6 +259,21 @@ public interface HelixAdmin { void enableCluster(String clusterName, boolean enabled, String reason); /** + * Enable or disable maintenance mode for a cluster + * @param clusterName + * @param enabled + */ + void enableMaintenanceMode(String clusterName, boolean enabled); + + /** + * Enable or disable maintenance mode for a cluster + * @param clusterName + * @param enabled + * @param reason + */ + void enableMaintenanceMode(String clusterName, boolean enabled, String reason); + + /** * Reset a list of partitions in error state for an instance * The partitions are assume to be in error state and reset will bring them from error * to initial state. An error to initial state transition is required for reset. http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/HelixDataAccessor.java ---------------------------------------------------------------------- diff --git a/helix-core/src/main/java/org/apache/helix/HelixDataAccessor.java b/helix-core/src/main/java/org/apache/helix/HelixDataAccessor.java index 5c2baec..f8ae131 100644 --- a/helix-core/src/main/java/org/apache/helix/HelixDataAccessor.java +++ b/helix-core/src/main/java/org/apache/helix/HelixDataAccessor.java @@ -24,6 +24,7 @@ import java.util.Map; import org.I0Itec.zkclient.DataUpdater; import org.apache.helix.model.LiveInstance; +import org.apache.helix.model.MaintenanceSignal; import org.apache.helix.model.Message; import org.apache.helix.model.PauseSignal; import org.apache.helix.model.StateModelDefinition; @@ -38,6 +39,7 @@ public interface HelixDataAccessor { boolean createControllerMessage(Message message); boolean createControllerLeader(LiveInstance leader); boolean createPause(PauseSignal pauseSignal); + boolean createMaintenance(MaintenanceSignal maintenanceSignal); /** * Set a property, overwrite if it exists and creates if not exists. This api http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/PropertyKey.java ---------------------------------------------------------------------- diff --git a/helix-core/src/main/java/org/apache/helix/PropertyKey.java b/helix-core/src/main/java/org/apache/helix/PropertyKey.java index f2d1e7b..4e58a89 100644 --- a/helix-core/src/main/java/org/apache/helix/PropertyKey.java +++ b/helix-core/src/main/java/org/apache/helix/PropertyKey.java @@ -32,6 +32,7 @@ import org.apache.helix.model.IdealState; import org.apache.helix.model.InstanceConfig; import org.apache.helix.model.LeaderHistory; import org.apache.helix.model.LiveInstance; +import org.apache.helix.model.MaintenanceSignal; import org.apache.helix.model.Message; import org.apache.helix.model.ParticipantHistory; import org.apache.helix.model.PauseSignal; @@ -674,6 +675,14 @@ public class PropertyKey { } /** + * Get a property key associated with {@link MaintenanceSignal} + * @return {@link PropertyKey} + */ + public PropertyKey maintenance() { + return new PropertyKey(MAINTENANCE, MaintenanceSignal.class, _clusterName); + } + + /** * Get a property key associated with a {@link HealthStat} for an instance * @param instanceName * @param id identifies the statistics http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/PropertyPathBuilder.java ---------------------------------------------------------------------- diff --git a/helix-core/src/main/java/org/apache/helix/PropertyPathBuilder.java b/helix-core/src/main/java/org/apache/helix/PropertyPathBuilder.java index a61519a..cfff5bb 100644 --- a/helix-core/src/main/java/org/apache/helix/PropertyPathBuilder.java +++ b/helix-core/src/main/java/org/apache/helix/PropertyPathBuilder.java @@ -19,17 +19,6 @@ package org.apache.helix; * under the License. */ -import static org.apache.helix.PropertyType.CONFIGS; -import static org.apache.helix.PropertyType.CURRENTSTATES; -import static org.apache.helix.PropertyType.EXTERNALVIEW; -import static org.apache.helix.PropertyType.HISTORY; -import static org.apache.helix.PropertyType.IDEALSTATES; -import static org.apache.helix.PropertyType.LIVEINSTANCES; -import static org.apache.helix.PropertyType.MESSAGES; -import static org.apache.helix.PropertyType.PAUSE; -import static org.apache.helix.PropertyType.STATEMODELDEFS; -import static org.apache.helix.PropertyType.STATUSUPDATES; - import java.util.Arrays; import java.util.HashMap; import java.util.Map; @@ -42,6 +31,7 @@ import org.apache.helix.model.IdealState; import org.apache.helix.model.InstanceConfig; import org.apache.helix.model.LeaderHistory; import org.apache.helix.model.LiveInstance; +import org.apache.helix.model.MaintenanceSignal; import org.apache.helix.model.Message; import org.apache.helix.model.PauseSignal; import org.apache.helix.model.StateModelDefinition; @@ -49,6 +39,8 @@ import org.apache.helix.model.StatusUpdate; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.helix.PropertyType.*; + /** * Utility mapping properties to their Zookeeper locations */ @@ -70,6 +62,7 @@ public class PropertyPathBuilder { typeToClassMapping.put(STATUSUPDATES, StatusUpdate.class); typeToClassMapping.put(HISTORY, LeaderHistory.class); typeToClassMapping.put(PAUSE, PauseSignal.class); + typeToClassMapping.put(MAINTENANCE, MaintenanceSignal.class); // @formatter:off addEntry(PropertyType.CONFIGS, 1, "/{clusterName}/CONFIGS"); @@ -132,6 +125,7 @@ public class PropertyPathBuilder { addEntry(PropertyType.LEADER, 1, "/{clusterName}/CONTROLLER/LEADER"); addEntry(PropertyType.HISTORY, 1, "/{clusterName}/CONTROLLER/HISTORY"); addEntry(PropertyType.PAUSE, 1, "/{clusterName}/CONTROLLER/PAUSE"); + addEntry(PropertyType.MAINTENANCE, 1, "/{clusterName}/CONTROLLER/MAINTENANCE"); // @formatter:on } @@ -354,4 +348,8 @@ public class PropertyPathBuilder { public static String pause(String clusterName) { return String.format("/%s/CONTROLLER/PAUSE", clusterName); } + + public static String maintenance(String clusterName) { + return String.format("/%s/CONTROLLER/MAINTENANCE", clusterName); + } } http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/PropertyType.java ---------------------------------------------------------------------- diff --git a/helix-core/src/main/java/org/apache/helix/PropertyType.java b/helix-core/src/main/java/org/apache/helix/PropertyType.java index b522014..73da39e 100644 --- a/helix-core/src/main/java/org/apache/helix/PropertyType.java +++ b/helix-core/src/main/java/org/apache/helix/PropertyType.java @@ -58,6 +58,7 @@ public enum PropertyType { LEADER(Type.CONTROLLER, false, false, true, true), HISTORY(Type.CONTROLLER, true, true, true), PAUSE(Type.CONTROLLER, true, false, true), + MAINTENANCE(Type.CONTROLLER, true, false, true), MESSAGES_CONTROLLER(Type.CONTROLLER, true, false, true), STATUSUPDATES_CONTROLLER(Type.CONTROLLER, true, true, true), ERRORS_CONTROLLER(Type.CONTROLLER, true, true, true); http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java ---------------------------------------------------------------------- diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java index b1888d1..5d77eb3 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java @@ -314,7 +314,7 @@ public abstract class AbstractRebalancer implements Rebalancer, MappingCalculato * Sorter for nodes that sorts according to the CurrentState of the partition. There are only two priorities: * (1) Top-state and second states have priority 0. (2) Other states(or no state) have priority 1. */ - private static class TopStatePreferenceListComparator implements Comparator<String> { + protected static class TopStatePreferenceListComparator implements Comparator<String> { protected final Map<String, String> _currentStateMap; protected final StateModelDefinition _stateModelDef; @@ -345,4 +345,37 @@ public abstract class AbstractRebalancer implements Rebalancer, MappingCalculato return p1 - p2; } } + + /** + * Sorter for nodes that sorts according to the CurrentState of the partition, based on the state priority defined + * in the state model definition. + * If the CurrentState doesn't exist, treat it as having lowest priority(Integer.MAX_VALUE). + */ + protected static class PreferenceListNodeComparator implements Comparator<String> { + protected final Map<String, String> _currentStateMap; + protected final StateModelDefinition _stateModelDef; + + public PreferenceListNodeComparator(Map<String, String> currentStateMap, StateModelDefinition stateModelDef) { + _currentStateMap = currentStateMap; + _stateModelDef = stateModelDef; + } + + @Override + public int compare(String ins1, String ins2) { + Integer p1 = Integer.MAX_VALUE; + Integer p2 = Integer.MAX_VALUE; + + Map<String, Integer> statesPriorityMap = _stateModelDef.getStatePriorityMap(); + String state1 = _currentStateMap.get(ins1); + String state2 = _currentStateMap.get(ins2); + if (state1 != null && statesPriorityMap.containsKey(state1)) { + p1 = statesPriorityMap.get(state1); + } + if (state2 != null && statesPriorityMap.containsKey(state2)) { + p2 = statesPriorityMap.get(state2); + } + + return p1.compareTo(p2); + } + } } http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java ---------------------------------------------------------------------- diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java index adac235..5ebb57d 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java @@ -529,38 +529,4 @@ public class DelayedAutoRebalancer extends AbstractRebalancer { return true; } - - - /** - * Sorter for nodes that sorts according to the CurrentState of the partition, based on the state priority defined - * in the state model definition. - * If the CurrentState doesn't exist, treat it as having lowest priority(Integer.MAX_VALUE). - */ - private static class PreferenceListNodeComparator implements Comparator<String> { - protected final Map<String, String> _currentStateMap; - protected final StateModelDefinition _stateModelDef; - - public PreferenceListNodeComparator(Map<String, String> currentStateMap, StateModelDefinition stateModelDef) { - _currentStateMap = currentStateMap; - _stateModelDef = stateModelDef; - } - - @Override - public int compare(String ins1, String ins2) { - Integer p1 = Integer.MAX_VALUE; - Integer p2 = Integer.MAX_VALUE; - - Map<String, Integer> statesPriorityMap = _stateModelDef.getStatePriorityMap(); - String state1 = _currentStateMap.get(ins1); - String state2 = _currentStateMap.get(ins2); - if (state1 != null && statesPriorityMap.containsKey(state1)) { - p1 = statesPriorityMap.get(state1); - } - if (state2 != null && statesPriorityMap.containsKey(state2)) { - p2 = statesPriorityMap.get(state2); - } - - return p1.compareTo(p2); - } - } } http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/MaintenanceRebalancer.java ---------------------------------------------------------------------- diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/MaintenanceRebalancer.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/MaintenanceRebalancer.java new file mode 100644 index 0000000..d324659 --- /dev/null +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/MaintenanceRebalancer.java @@ -0,0 +1,43 @@ +package org.apache.helix.controller.rebalancer; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import org.apache.helix.controller.stages.ClusterDataCache; +import org.apache.helix.controller.stages.CurrentStateOutput; +import org.apache.helix.model.IdealState; +import org.apache.helix.model.Partition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MaintenanceRebalancer extends SemiAutoRebalancer { + private static final Logger LOG = LoggerFactory.getLogger(MaintenanceRebalancer.class); + + @Override + public IdealState computeNewIdealState(String resourceName, IdealState currentIdealState, + CurrentStateOutput currentStateOutput, ClusterDataCache clusterData) { + LOG.info(String + .format("Start computing ideal state for resource %s in maintenance mode.", resourceName)); + Map<Partition, Map<String, String>> currentStateMap = + currentStateOutput.getCurrentStateMap(resourceName); + if (currentStateMap == null || currentStateMap.size() == 0) { + LOG.warn(String + .format("No new partition will be assigned for %s in maintenance mode", resourceName)); + currentIdealState.setPreferenceLists(Collections.EMPTY_MAP); + return currentIdealState; + } + + // One principal is to prohibit DROP -> OFFLINE and OFFLINE -> DROP state transitions. + // Derived preference list from current state with state priority + for (Partition partition : currentStateMap.keySet()) { + Map<String, String> stateMap = currentStateMap.get(partition); + List<String> preferenceList = new ArrayList<>(stateMap.keySet()); + Collections.sort(preferenceList, new PreferenceListNodeComparator(stateMap, + clusterData.getStateModelDef(currentIdealState.getStateModelDefRef()))); + currentIdealState.setPreferenceList(partition.getPartitionName(), preferenceList); + } + LOG.info("End computing ideal state for resource %s in maintenance mode."); + return currentIdealState; + } +} http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java ---------------------------------------------------------------------- diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java index c43b96c..4fb8cd7 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java +++ b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java @@ -26,6 +26,7 @@ import org.apache.helix.controller.pipeline.AbstractBaseStage; import org.apache.helix.controller.pipeline.StageException; import org.apache.helix.controller.rebalancer.AutoRebalancer; import org.apache.helix.controller.rebalancer.CustomRebalancer; +import org.apache.helix.controller.rebalancer.MaintenanceRebalancer; import org.apache.helix.controller.rebalancer.Rebalancer; import org.apache.helix.controller.rebalancer.SemiAutoRebalancer; import org.apache.helix.controller.rebalancer.internal.MappingCalculator; @@ -211,7 +212,8 @@ public class BestPossibleStateCalcStage extends AbstractBaseStage { idealState.setStateModelDefRef(resource.getStateModelDefRef()); } - Rebalancer rebalancer = getRebalancer(idealState, resourceName); + Rebalancer rebalancer = + getRebalancer(idealState, resourceName, cache.isMaintenanceModeEnabled()); MappingCalculator mappingCalculator = getMappingCalculator(rebalancer, resourceName); if (rebalancer == null || mappingCalculator == null) { @@ -289,7 +291,8 @@ public class BestPossibleStateCalcStage extends AbstractBaseStage { } } - private Rebalancer getRebalancer(IdealState idealState, String resourceName) { + private Rebalancer getRebalancer(IdealState idealState, String resourceName, + boolean isMaintenanceModeEnabled) { Rebalancer customizedRebalancer = null; String rebalancerClassName = idealState.getRebalancerClassName(); if (rebalancerClassName != null) { @@ -305,10 +308,14 @@ public class BestPossibleStateCalcStage extends AbstractBaseStage { Rebalancer rebalancer = null; switch (idealState.getRebalanceMode()) { case FULL_AUTO: - if (customizedRebalancer != null) { - rebalancer = customizedRebalancer; + if (isMaintenanceModeEnabled) { + rebalancer = new MaintenanceRebalancer(); } else { - rebalancer = new AutoRebalancer(); + if (customizedRebalancer != null) { + rebalancer = customizedRebalancer; + } else { + rebalancer = new AutoRebalancer(); + } } break; case SEMI_AUTO: http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/controller/stages/ClusterDataCache.java ---------------------------------------------------------------------- diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/ClusterDataCache.java b/helix-core/src/main/java/org/apache/helix/controller/stages/ClusterDataCache.java index 1dd862d..5b4aa83 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/stages/ClusterDataCache.java +++ b/helix-core/src/main/java/org/apache/helix/controller/stages/ClusterDataCache.java @@ -45,6 +45,7 @@ import org.apache.helix.model.ExternalView; import org.apache.helix.model.IdealState; import org.apache.helix.model.InstanceConfig; import org.apache.helix.model.LiveInstance; +import org.apache.helix.model.MaintenanceSignal; import org.apache.helix.model.Message; import org.apache.helix.model.ParticipantHistory; import org.apache.helix.model.ResourceAssignment; @@ -111,6 +112,7 @@ public class ClusterDataCache { boolean _updateInstanceOfflineTime = true; boolean _isTaskCache; + boolean _isMaintenanceModeEnabled; private String _clusterName; @@ -204,6 +206,9 @@ public class ClusterDataCache { LOG.warn("Cluster config is null!"); } + MaintenanceSignal maintenanceSignal = accessor.getProperty(keyBuilder.maintenance()); + _isMaintenanceModeEnabled = (maintenanceSignal != null) ? true : false; + long endTime = System.currentTimeMillis(); LOG.info( "END: ClusterDataCache.refresh() for cluster " + getClusterName() + ", took " + (endTime @@ -1008,6 +1013,10 @@ public class ClusterDataCache { return _isTaskCache; } + public boolean isMaintenanceModeEnabled() { + return _isMaintenanceModeEnabled; + } + /** * toString method to print the entire cluster state */ http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java ---------------------------------------------------------------------- diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java index c3fa9e9..1af881e 100644 --- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java +++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java @@ -67,6 +67,7 @@ import org.apache.helix.model.IdealState; import org.apache.helix.model.IdealState.RebalanceMode; import org.apache.helix.model.InstanceConfig; import org.apache.helix.model.LiveInstance; +import org.apache.helix.model.MaintenanceSignal; import org.apache.helix.model.Message; import org.apache.helix.model.Message.MessageState; import org.apache.helix.model.Message.MessageType; @@ -326,7 +327,33 @@ public class ZKHelixAdmin implements HelixAdmin { if (reason != null) { pauseSignal.setReason(reason); } - accessor.createPause(pauseSignal); + if (!accessor.createPause(pauseSignal)) { + throw new HelixException("Failed to create pause signal"); + } + } + } + + @Override + public void enableMaintenanceMode(String clusterName, boolean enabled) { + enableMaintenanceMode(clusterName, enabled, null); + } + + @Override + public void enableMaintenanceMode(String clusterName, boolean enabled, String reason) { + HelixDataAccessor accessor = + new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_zkClient)); + Builder keyBuilder = accessor.keyBuilder(); + + if (!enabled) { + accessor.removeProperty(keyBuilder.maintenance()); + } else { + MaintenanceSignal maintenanceSignal = new MaintenanceSignal("maintenance"); + if (reason != null) { + maintenanceSignal.setReason(reason); + } + if (!accessor.createMaintenance(maintenanceSignal)) { + throw new HelixException("Failed to create maintenance signal"); + } } } http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixDataAccessor.java ---------------------------------------------------------------------- diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixDataAccessor.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixDataAccessor.java index c5ce121..eff76f8 100644 --- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixDataAccessor.java +++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixDataAccessor.java @@ -43,6 +43,7 @@ import org.apache.helix.ZNRecordAssembler; import org.apache.helix.ZNRecordBucketizer; import org.apache.helix.ZNRecordUpdater; import org.apache.helix.model.LiveInstance; +import org.apache.helix.model.MaintenanceSignal; import org.apache.helix.model.Message; import org.apache.helix.model.PauseSignal; import org.apache.helix.model.StateModelDefinition; @@ -113,6 +114,13 @@ public class ZKHelixDataAccessor implements HelixDataAccessor { } @Override + public boolean createMaintenance(MaintenanceSignal maintenanceSignal) { + return _baseDataAccessor + .create(PropertyPathBuilder.maintenance(_clusterName), maintenanceSignal.getRecord(), + AccessOption.PERSISTENT); + } + + @Override public <T extends HelixProperty> boolean setProperty(PropertyKey key, T value) { PropertyType type = key.getType(); if (!value.isValid()) { http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/model/MaintenanceSignal.java ---------------------------------------------------------------------- diff --git a/helix-core/src/main/java/org/apache/helix/model/MaintenanceSignal.java b/helix-core/src/main/java/org/apache/helix/model/MaintenanceSignal.java new file mode 100644 index 0000000..b678738 --- /dev/null +++ b/helix-core/src/main/java/org/apache/helix/model/MaintenanceSignal.java @@ -0,0 +1,13 @@ +package org.apache.helix.model; + +import org.apache.helix.ZNRecord; + +public class MaintenanceSignal extends PauseSignal { + public MaintenanceSignal(String id) { + super(id); + } + + public MaintenanceSignal(ZNRecord record) { + super(record); + } +} http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/test/java/org/apache/helix/MockAccessor.java ---------------------------------------------------------------------- diff --git a/helix-core/src/test/java/org/apache/helix/MockAccessor.java b/helix-core/src/test/java/org/apache/helix/MockAccessor.java index d41c2d3..583dbd8 100644 --- a/helix-core/src/test/java/org/apache/helix/MockAccessor.java +++ b/helix-core/src/test/java/org/apache/helix/MockAccessor.java @@ -27,6 +27,7 @@ import org.I0Itec.zkclient.DataUpdater; import org.I0Itec.zkclient.exception.ZkNoNodeException; import org.apache.helix.mock.MockBaseDataAccessor; import org.apache.helix.model.LiveInstance; +import org.apache.helix.model.MaintenanceSignal; import org.apache.helix.model.Message; import org.apache.helix.model.PauseSignal; import org.apache.helix.model.StateModelDefinition; @@ -67,6 +68,10 @@ public class MockAccessor implements HelixDataAccessor { return false; } + @Override public boolean createMaintenance(MaintenanceSignal maintenanceSignal) { + return false; + } + @Override public boolean setProperty(PropertyKey key, HelixProperty value) { String path = key.getPath(); _baseDataAccessor.set(path, value.getRecord(), AccessOption.PERSISTENT); http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java ---------------------------------------------------------------------- diff --git a/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java b/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java new file mode 100644 index 0000000..3949183 --- /dev/null +++ b/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java @@ -0,0 +1,81 @@ +package org.apache.helix.integration.controller; + +import java.util.Map; +import org.apache.helix.integration.manager.MockParticipantManager; +import org.apache.helix.integration.task.TaskTestBase; +import org.apache.helix.integration.task.WorkflowGenerator; +import org.apache.helix.model.ExternalView; +import org.apache.helix.model.IdealState; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +public class TestClusterMaintenanceMode extends TaskTestBase { + @BeforeClass + public void beforeClass() throws Exception { + _numDbs = 1; + _numNodes = 3; + _numReplicas = 3; + _numParitions = 5; + super.beforeClass(); + } + + @Test + public void testMaintenanceModeAddNewInstance() throws InterruptedException { + _gSetupTool.getClusterManagementTool().enableMaintenanceMode(CLUSTER_NAME, true, "Test"); + Thread.sleep(2000); + ExternalView prevExternalView = _gSetupTool.getClusterManagementTool() + .getResourceExternalView(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB); + String instanceName = PARTICIPANT_PREFIX + "_" + (_startPort + 10); + _setupTool.addInstanceToCluster(CLUSTER_NAME, instanceName); + MockParticipantManager newInstance = + new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, instanceName); + newInstance.syncStart(); + _gSetupTool.getClusterManagementTool() + .rebalance(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB, 3); + Thread.sleep(3000); + ExternalView newExternalView = _gSetupTool.getClusterManagementTool() + .getResourceExternalView(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB); + Assert.assertEquals(prevExternalView.getRecord().getMapFields(), + newExternalView.getRecord().getMapFields()); + } + + @Test (dependsOnMethods = "testMaintenanceModeAddNewInstance") + public void testMaintenanceModeAddNewResource() throws InterruptedException { + _gSetupTool.getClusterManagementTool() + .addResource(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB + 1, 7, "MasterSlave", + IdealState.RebalanceMode.FULL_AUTO.name()); + _gSetupTool.getClusterManagementTool() + .rebalance(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB + 1, 3); + Thread.sleep(2000); + ExternalView externalView = _gSetupTool.getClusterManagementTool() + .getResourceExternalView(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB + 1); + Assert.assertNull(externalView); + } + + @Test (dependsOnMethods = "testMaintenanceModeAddNewResource") + public void testMaintenanceModeInstanceDown() throws InterruptedException { + _participants[0].syncStop(); + Thread.sleep(2000); + ExternalView externalView = _gSetupTool.getClusterManagementTool() + .getResourceExternalView(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB); + for (Map<String, String> stateMap : externalView.getRecord().getMapFields().values()) { + Assert.assertTrue(stateMap.values().contains("MASTER")); + } + } + + @Test (dependsOnMethods = "testMaintenanceModeInstanceDown") + public void testMaintenanceModeInstanceBack() throws InterruptedException { + _participants[0] = + new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, _participants[0].getInstanceName()); + _participants[0].syncStart(); + Thread.sleep(2000); + ExternalView externalView = _gSetupTool.getClusterManagementTool() + .getResourceExternalView(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB); + for (Map<String, String> stateMap : externalView.getRecord().getMapFields().values()) { + if (stateMap.containsKey(_participants[0].getInstanceName())) { + Assert.assertTrue(stateMap.get(_participants[0].getInstanceName()).equals("SLAVE")); + } + } + } +} http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java ---------------------------------------------------------------------- diff --git a/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java b/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java index 037d92b..b1d5da7 100644 --- a/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java +++ b/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java @@ -253,6 +253,14 @@ public class MockHelixAdmin implements HelixAdmin { } + @Override public void enableMaintenanceMode(String clusterName, boolean enabled) { + + } + + @Override public void enableMaintenanceMode(String clusterName, boolean enabled, String reason) { + + } + @Override public void resetPartition(String clusterName, String instanceName, String resourceName, List<String> partitionNames) {
