[
https://issues.apache.org/jira/browse/YARN-6163?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15866937#comment-15866937
]
ASF GitHub Bot commented on YARN-6163:
--------------------------------------
Github user templedf commented on a diff in the pull request:
https://github.com/apache/hadoop/pull/192#discussion_r101171862
--- Diff:
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/VisitedResourceRequestTracker.java
---
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
+
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import
org.apache.hadoop.yarn.server.resourcemanager.scheduler.ClusterNodeTracker;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Applications place {@link ResourceRequest}s at multiple levels. This is
a
+ * helper class that allows tracking if a {@link ResourceRequest} has been
+ * visited at a different locality level.
+ *
+ * This is implemented for {@link
FSAppAttempt#getStarvedResourceRequests()}.
+ * The implementation is not thread-safe.
+ */
+class VisitedResourceRequestTracker {
+ private final Map<Priority, Map<Resource, TrackerPerPriorityResource>>
map =
+ new HashMap<>();
+ private final ClusterNodeTracker<FSSchedulerNode> nodeTracker;
+
+ VisitedResourceRequestTracker(
+ ClusterNodeTracker<FSSchedulerNode> nodeTracker) {
+ this.nodeTracker = nodeTracker;
+ }
+
+ /**
+ * Check if the {@link ResourceRequest} is visited before, and track it.
+ * @param rr {@link ResourceRequest} to visit
+ * @return true if <code>rr</code> this is the first visit across all
+ * locality levels, false otherwise
+ */
+ boolean visit(ResourceRequest rr) {
+ Priority priority = rr.getPriority();
+ Resource capability = rr.getCapability();
+
+ Map<Resource, TrackerPerPriorityResource> subMap = map.get(priority);
+ if (subMap == null) {
+ subMap = new HashMap<>();
+ map.put(priority, subMap);
+ }
+
+ TrackerPerPriorityResource tracker = subMap.get(capability);
+ if (tracker == null) {
+ tracker = new TrackerPerPriorityResource();
+ subMap.put(capability, tracker);
+ }
+
+ return tracker.visit(rr.getResourceName());
+ }
+
+ private class TrackerPerPriorityResource {
+ private Set<String> racksWithNodesVisited = new HashSet<>();
+ private Set<String> racksVisted = new HashSet<>();
+ private boolean anyVisited;
+
+ private boolean visitAny() {
+ if (racksVisted.isEmpty() && racksWithNodesVisited.isEmpty()) {
+ anyVisited = true;
+ }
+ return anyVisited;
+ }
+
+ private boolean visitRack(String rackName) {
+ if (anyVisited || racksWithNodesVisited.contains(rackName)) {
+ return false;
+ } else {
+ racksVisted.add(rackName);
+ return true;
+ }
+ }
+
+ private boolean visitNode(String rackName) {
+ if (anyVisited || racksVisted.contains(rackName)) {
+ return false;
+ } else {
+ racksWithNodesVisited.add(rackName);
+ return true;
+ }
+ }
+
+ private boolean visit(String resourceName) {
+ if (resourceName.equals(ResourceRequest.ANY)) {
+ return visitAny();
+ }
+
+ List<FSSchedulerNode> nodes =
+ nodeTracker.getNodesByResourceName(resourceName);
+ switch (nodes.size()) {
--- End diff --
I don't love this as a switch.
> FS Preemption is a trickle for severely starved applications
> ------------------------------------------------------------
>
> Key: YARN-6163
> URL: https://issues.apache.org/jira/browse/YARN-6163
> Project: Hadoop YARN
> Issue Type: Sub-task
> Components: fairscheduler
> Affects Versions: 2.9.0
> Reporter: Karthik Kambatla
> Assignee: Karthik Kambatla
> Attachments: yarn-6163-1.patch, yarn-6163-2.patch
>
>
> With current logic, only one RR is considered per each instance of marking an
> application starved. This marking happens only on the update call that runs
> every 500ms. Due to this, an application that is severely starved takes
> forever to reach fairshare based on preemptions.
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]