[ https://issues.apache.org/jira/browse/YARN-11736?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17890857#comment-17890857 ]
ASF GitHub Bot commented on YARN-11736: --------------------------------------- TaoYang526 commented on code in PR #7121: URL: https://github.com/apache/hadoop/pull/7121#discussion_r1806074629 ########## hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/policy/MultiComparatorPolicy.java: ########## @@ -0,0 +1,346 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.policy; + +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.MultiNodeLookupPolicy; +import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator; +import org.apache.hadoop.yarn.util.resource.Resources; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Function; +import java.util.stream.Collectors; + +import static org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration.DOT; + +/** + * <p> + * This class has the following functionality: + * + * <p> + * MultiComparatorPolicy + * - manages some common comparators to help sorting nodes by + * allocated/unallocated/total resource, dominant ratio, etc. + * - holds sorted nodes list based on the of nodes at given time. + * - can be configured with specified comparators. + * </p> + */ +public class MultiComparatorPolicy<N extends SchedulerNode> + implements MultiNodeLookupPolicy<N>, Configurable { + + private static final Logger LOG = + LoggerFactory.getLogger(MultiComparatorPolicy.class); + // comparators + private static final DominantResourceCalculator DOMINANT_RC = + new DominantResourceCalculator(); + private static final Map<ComparatorKey, Function<SchedulerNode, Comparable>> + COMPARATOR_CALCULATORS = Collections.unmodifiableMap( + new HashMap<ComparatorKey, Function<SchedulerNode, Comparable>>() {{ + // for vcores + put(ComparatorKey.ALLOCATED_VCORES, + obj -> obj.getAllocatedResource().getVirtualCores()); + put(ComparatorKey.UNALLOCATED_VCORES, + obj -> obj.getUnallocatedResource().getVirtualCores()); + put(ComparatorKey.TOTAL_VCORES, + obj -> obj.getTotalResource().getVirtualCores()); + // for memory + put(ComparatorKey.ALLOCATED_MEMORY, + obj -> obj.getAllocatedResource().getMemorySize()); + put(ComparatorKey.UNALLOCATED_MEMORY, + obj -> obj.getUnallocatedResource().getMemorySize()); + put(ComparatorKey.TOTAL_MEMORY, + obj -> obj.getTotalResource().getMemorySize()); + // for resource + put(ComparatorKey.ALLOCATED_RESOURCE, + SchedulerNode::getAllocatedResource); + put(ComparatorKey.UNALLOCATED_RESOURCE, + SchedulerNode::getUnallocatedResource); + put(ComparatorKey.TOTAL_RESOURCE, + SchedulerNode::getTotalResource); + // for dominant ratio + put(ComparatorKey.DOMINANT_ALLOCATED_RATIO, obj -> Resources + .ratio(DOMINANT_RC, obj.getAllocatedResource(), + obj.getTotalResource())); + // for node ID + put(ComparatorKey.NODE_ID, SchedulerNode::getNodeID); + }}); + // conf keys and default values + public static final String COMPARATORS_CONF_KEY = "comparators"; + protected static final List<Comparator> DEFAULT_COMPARATORS = Collections + .unmodifiableList(Arrays.asList( + new Comparator(ComparatorKey.DOMINANT_ALLOCATED_RATIO, + OrderDirection.ASC, COMPARATOR_CALCULATORS + .get(ComparatorKey.DOMINANT_ALLOCATED_RATIO)), + new Comparator(ComparatorKey.NODE_ID, OrderDirection.ASC, + COMPARATOR_CALCULATORS.get(ComparatorKey.NODE_ID)))); + + protected Map<String, Set<N>> nodesPerPartition = new ConcurrentHashMap<>(); + protected List<Comparator> comparators; + private Configuration conf; + + public MultiComparatorPolicy() { + } + + @Override + public void setConf(Configuration conf) { + // init comparators + this.comparators = DEFAULT_COMPARATORS; + if (conf == null) { + return; + } + this.conf = conf; + String policyName = conf.get( + CapacitySchedulerConfiguration.MULTI_NODE_SORTING_POLICY_CURRENT_NAME); Review Comment: Currently`MULTI_NODE_SORTING_POLICY_NAME` is the prefix of all the configurations of multi-node policy: `yarn.scheduler.capacity.multi-node-sorting.policy`, it's not a proper name but used in many places so I prefer not to update it. `MULTI_NODE_SORTING_POLICY_CURRENT_NAME` is used to transfer the policyName to policy instance. > Enhance MultiNodeLookupPolicy to allow configuration of extended comparators > for better usability. > -------------------------------------------------------------------------------------------------- > > Key: YARN-11736 > URL: https://issues.apache.org/jira/browse/YARN-11736 > Project: Hadoop YARN > Issue Type: Improvement > Components: multi-node-placement > Reporter: Tao Yang > Assignee: Tao Yang > Priority: Major > Labels: pull-request-available > > Currently when multi-nodes is enabled, there is only 1 implementation of > _MultiNodeLookupPolicy_ interface: {_}ResourceUsageMultiNodeLookupPolicy{_}, > which will sort nodes by allocated resources in ascending order. > If cluster has nodes with different resource-spec, the resource utilization > of smaller nodes will be significantly high, while larger nodes will > experience low resource utilization. This may rise the hotspot risk and > reduce the scheduling effectiveness. > So I propose to add a new policy called _MultiComparatorPolicy_ to meet > requirements from some complex scenarios, which should contains serveral > inherit comparators and can be extended later, and supports configuring > specified comparators for different policy instances. > > {*}Implementation Details{*}: > 1. _MultiNodeSorter#initPolicy_ will pass the policyConf which is cloned from > scheduler configuration and attached the name of current policy, so that we > can fetch the specified configuration for this policy inside the > implementations of MultiNodeLookupPolicy. > 2. new implementation of {_}MultiNodeLookupPolicy{_}: _MultiComparatorPolicy_ > 2.1) contains several inherit comparators and can be extendable later. > comparator keys: _ALLOCATED_RESOURCE / UNALLOCATED_RESOURCE / > DOMINANT_RESOURCE_RATIO / NODE_ID,_ order-directions: _ASC / DESC._ > 2.2) supports configuring specified comparators with > order-direction(ASC/DESC) for different policy instances via conf-key: > {+}{color:#172b4d}yarn.scheduler.capacity.multi-node-sorting.policy.<policy-name>.comparators{color}{+}, > value format is > "<comparator_key_1>[:<order_direction_1>],<comparator_key_2>[:<order_direction_2>],...". > For example, "DOMINANT_ALLOCATED_RATIO,NODE_ID:DESC" means that for policy > test, nodes should be sorted by dominant-resource-ratio in ascending order, > by nodeID desc in descending order. > 3. Refactor variable names in > AbstractCSQueue/CSQueue/FiCaSchedulerApp/AppPlacementAllocator after > supporting multiple policy instances with the same policy class. -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: yarn-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: yarn-issues-h...@hadoop.apache.org