[ https://issues.apache.org/jira/browse/YARN-6163?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15862475#comment-15862475 ]
ASF GitHub Bot commented on YARN-6163: -------------------------------------- Github user kambatla commented on a diff in the pull request: https://github.com/apache/hadoop/pull/192#discussion_r100673288 --- Diff: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java --- @@ -1106,6 +1111,97 @@ boolean isStarvedForFairShare() { return !Resources.isNone(fairshareStarvation); } + /** + * Helper method for {@link #getStarvedResourceRequests()}: + * Given a map of visited {@link ResourceRequest}s, it checks if + * {@link ResourceRequest} 'rr' has already been visited. The map is updated + * to reflect visiting 'rr'. + */ + private static boolean checkAndMarkRRVisited( + Map<Priority, List<Resource>> visitedRRs, ResourceRequest rr) { + Priority priority = rr.getPriority(); + Resource capability = rr.getCapability(); + if (visitedRRs.containsKey(priority)) { + List<Resource> rrList = visitedRRs.get(priority); + if (rrList.contains(capability)) { + return true; + } else { + rrList.add(capability); + return false; + } + } else { + List<Resource> newRRList = new ArrayList<>(); + newRRList.add(capability); + visitedRRs.put(priority, newRRList); + return false; + } + } + + /** + * Fetch a list of RRs corresponding to the extent the app is starved + * (fairshare and minshare). This method considers the number of containers + * in a RR and also only one locality-level (the first encountered + * resourceName). + * + * @return list of {@link ResourceRequest}s corresponding to the amount of + * starvation. + */ + List<ResourceRequest> getStarvedResourceRequests() { + List<ResourceRequest> ret = new ArrayList<>(); + Map<Priority, List<Resource>> visitedRRs= new HashMap<>(); + + Resource pending = getStarvation(); + for (ResourceRequest rr : appSchedulingInfo.getAllResourceRequests()) { + if (Resources.isNone(pending)) { + break; + } + if (checkAndMarkRRVisited(visitedRRs, rr)) { + continue; + } + + // Compute the number of containers of this capability that fit in the + // pending amount + int ratio = (int) Math.floor( + Resources.ratio(scheduler.getResourceCalculator(), + pending, rr.getCapability())); + if (ratio == 0) { + continue; + } + + // If the RR is only partially being satisfied, include only the + // partial number of containers. + if (ratio < rr.getNumContainers()) { + rr = ResourceRequest.newInstance( + rr.getPriority(), rr.getResourceName(), rr.getCapability(), ratio); + } + ret.add(rr); + Resources.subtractFromNonNegative(pending, + Resources.multiply(rr.getCapability(), ratio)); + } + + return ret; + } + + /** + * Notify this app that preemption has been triggered to make room for + * outstanding demand. The app should not be considered starved until after + * the specified delay. + * + * @param delayBeforeNextStarvationCheck duration to wait + */ + void preemptionTriggered(long delayBeforeNextStarvationCheck) { + nextStarvationCheck = + scheduler.getClock().getTime() + delayBeforeNextStarvationCheck; + } + + /** + * Whether this app's starvation should be considered. + */ + boolean shouldCheckForStarvation() { + long now = scheduler.getClock().getTime(); + return now > nextStarvationCheck; --- End diff -- > and >= shouldn't really matter. Updated to >= > FS Preemption is a trickle for severely starved applications > ------------------------------------------------------------ > > Key: YARN-6163 > URL: https://issues.apache.org/jira/browse/YARN-6163 > Project: Hadoop YARN > Issue Type: Sub-task > Components: fairscheduler > Affects Versions: 2.9.0 > Reporter: Karthik Kambatla > Assignee: Karthik Kambatla > Attachments: yarn-6163-1.patch > > > With current logic, only one RR is considered per each instance of marking an > application starved. This marking happens only on the update call that runs > every 500ms. Due to this, an application that is severely starved takes > forever to reach fairshare based on preemptions. -- This message was sent by Atlassian JIRA (v6.3.15#6346) --------------------------------------------------------------------- To unsubscribe, e-mail: yarn-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: yarn-issues-h...@hadoop.apache.org