jiajunwang commented on a change in pull request #362: The WAGED rebalancer cluster model implementation URL: https://github.com/apache/helix/pull/362#discussion_r310284240
########## File path: helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/AssignableNode.java ########## @@ -19,10 +19,290 @@ * under the License. */ +import org.apache.helix.HelixException; +import org.apache.helix.controller.dataproviders.ResourceControllerDataProvider; +import org.apache.helix.model.ClusterConfig; +import org.apache.helix.model.InstanceConfig; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static java.lang.Math.max; + /** - * A placeholder before we have the implementation. - * - * This class represents a potential allocation of the replication. - * Note that AssignableNode is not thread safe. + * This class represents a possible allocation of the replication. + * Note that any usage updates to the AssignableNode are not thread safe. */ -public class AssignableNode { } +public class AssignableNode { + private static final Logger _logger = LoggerFactory.getLogger(AssignableNode.class.getName()); + + // proposed assignment tracking + // <resource name, partition name> + private Map<String, Set<String>> _currentAssignments; + // <resource name, top state partition name> + private Map<String, Set<String>> _currentTopStateAssignments; + // <capacity key, capacity value> + private Map<String, Integer> _currentCapacity; + // runtime usage tracking + private int _totalReplicaAssignmentCount; + private float _highestCapacityUtilization; + + // basic node information + private final String _instanceName; + private Set<String> _instanceTags; + private String _faultZone; + private Map<String, List<String>> _disabledPartitionsMap; + private Map<String, Integer> _maxCapacity; + private int _maxPartition; + + AssignableNode(ResourceControllerDataProvider clusterCache, String instanceName, + Collection<AssignableReplica> existingAssignment) { + _instanceName = instanceName; + refresh(clusterCache, existingAssignment); + } + + private void reset() { + _currentAssignments = new HashMap<>(); + _currentTopStateAssignments = new HashMap<>(); + _currentCapacity = new HashMap<>(); + _totalReplicaAssignmentCount = 0; + _highestCapacityUtilization = 0; + } + + /** + * Update the node with a ClusterDataCache. This resets the current assignment and recalculate currentCapacity. + * NOTE: While this is required to be used in the constructor, this can also be used when the clusterCache needs to be + * refreshed. This is under the assumption that the capacity mappings of InstanceConfig and ResourceConfig could + * subject to changes. If the assumption is no longer true, this function should become private. + * + * @param clusterCache - the current cluster cache to initial the AssignableNode. + */ + private void refresh(ResourceControllerDataProvider clusterCache, Review comment: Some background here. We want to have a cluster data cache snapshot based on DataProvider. And that should have been used here. The snapshot is immutable. Unfortunately, we don't have this class implemented yet. This provider the closest thing we can use for now. We will need to refactor the usage of the data provider everywhere once the snapshot is done. This is planed in the scope of controller improvement. About the second point, the data provider does not know anything about data model. This is the current situation. I guess you are saying that data model shall not rely on the data provider's methods, right? This is a valid call, let me try to change the refresh method parameters. But still, a builder is too complicated for now. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services