Hot fix to reduce the chance of flip-flop issue in the legacy/default AutoRebalanceStrategy.
Project: http://git-wip-us.apache.org/repos/asf/helix/repo Commit: http://git-wip-us.apache.org/repos/asf/helix/commit/ae13411c Tree: http://git-wip-us.apache.org/repos/asf/helix/tree/ae13411c Diff: http://git-wip-us.apache.org/repos/asf/helix/diff/ae13411c Branch: refs/heads/master Commit: ae13411c8dd6fcf532d07c0d525eb717741f7f60 Parents: b1540f0 Author: Lei Xia <[email protected]> Authored: Mon Dec 4 13:40:41 2017 -0800 Committer: Junkai Xue <[email protected]> Committed: Wed Jan 24 18:31:59 2018 -0800 ---------------------------------------------------------------------- .../rebalancer/DelayedAutoRebalancer.java | 22 ++++++++++-- .../controller/stages/ClusterDataCache.java | 38 ++++++++++++++++++++ 2 files changed, 58 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/helix/blob/ae13411c/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java ---------------------------------------------------------------------- diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java index 5ebb57d..7ad2eb2 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java @@ -32,6 +32,7 @@ import java.util.Set; import org.apache.helix.HelixDefinedState; import org.apache.helix.ZNRecord; +import org.apache.helix.controller.rebalancer.strategy.AutoRebalanceStrategy; import org.apache.helix.controller.rebalancer.util.RebalanceScheduler; import org.apache.helix.controller.stages.ClusterDataCache; import org.apache.helix.controller.stages.CurrentStateOutput; @@ -57,7 +58,22 @@ public class DelayedAutoRebalancer extends AbstractRebalancer { public IdealState computeNewIdealState(String resourceName, IdealState currentIdealState, CurrentStateOutput currentStateOutput, ClusterDataCache clusterData) { - List<String> allPartitions = new ArrayList<String>(currentIdealState.getPartitionSet()); + + // Looking for cached ideal mapping for this resource, if it is already there, do not recompute it again. + // The cached mapping will be cleared in ClusterDataCache if there is anything changed in cluster state that can + // cause the potential changes in ideal state. + // this will avoid flip-flop issue we saw in AutoRebalanceStrategy. + ZNRecord znRecord = clusterData.getCachedIdealMapping(resourceName); + if (znRecord != null) { + // TODO: only apply to legacy Auto-RebalanceStrategy at this time, need to apply to any strategy in future. + if (currentIdealState.getRebalanceStrategy().equals(AutoRebalanceStrategy.class.getName())) { + LOG.info("Use cached idealstate for " + resourceName); + IdealState idealState = new IdealState(znRecord); + return idealState; + } + } + + List<String> allPartitions = new ArrayList<>(currentIdealState.getPartitionSet()); if (allPartitions.size() == 0) { LOG.info("Partition count is 0 for resource " + resourceName + ", stop calculate ideal mapping for the resource."); @@ -181,7 +197,9 @@ public class DelayedAutoRebalancer extends AbstractRebalancer { LOG.debug("finalMapping: " + finalMapping); } - return generateNewIdealState(resourceName, currentIdealState, finalMapping); + IdealState idealState = generateNewIdealState(resourceName, currentIdealState, finalMapping); + clusterData.setCachedIdealMapping(resourceName, idealState.getRecord()); + return idealState; } private IdealState generateNewIdealState(String resourceName, IdealState currentIdealState, http://git-wip-us.apache.org/repos/asf/helix/blob/ae13411c/helix-core/src/main/java/org/apache/helix/controller/stages/ClusterDataCache.java ---------------------------------------------------------------------- diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/ClusterDataCache.java b/helix-core/src/main/java/org/apache/helix/controller/stages/ClusterDataCache.java index 5b4aa83..e663fd4 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/stages/ClusterDataCache.java +++ b/helix-core/src/main/java/org/apache/helix/controller/stages/ClusterDataCache.java @@ -101,8 +101,13 @@ public class ClusterDataCache { private Map<PropertyKey, CurrentState> _currentStateCache = Maps.newHashMap(); // maintain a cache of bestPossible assignment across pipeline runs + // TODO: this is only for customRebalancer, remove it and merge it with _idealMappingCache. private Map<String, ResourceAssignment> _resourceAssignmentCache = Maps.newHashMap(); + + // maintain a cache of idealmapping (preference list) for full-auto resource across pipeline runs + private Map<String, ZNRecord> _idealMappingCache = Maps.newHashMap(); + private Map<ChangeType, Boolean> _propertyDataChangedMap; private Map<String, Integer> _participantActiveTaskCount = new HashMap<>(); @@ -985,8 +990,41 @@ public class ClusterDataCache { } + /** + * Get cached resourceAssignment (ideal mapping) for a resource + * + * @param resource + * + * @return + */ + public ZNRecord getCachedIdealMapping(String resource) { + return _idealMappingCache.get(resource); + } + + /** + * Get cached idealmapping + * + * @return + */ + public Map<String, ZNRecord> getCachedIdealMapping() { + return Collections.unmodifiableMap(_idealMappingCache); + } + + /** + * Cache resourceAssignment (ideal mapping) for a resource + * + * @param resource + * + * @return + */ + public void setCachedIdealMapping(String resource, ZNRecord mapping) { + _idealMappingCache.put(resource, mapping); + } + + public void clearCachedResourceAssignments() { _resourceAssignmentCache.clear(); + _idealMappingCache.clear(); } /**
