[
https://issues.apache.org/jira/browse/YARN-6163?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15868371#comment-15868371
]
ASF GitHub Bot commented on YARN-6163:
--------------------------------------
Github user templedf commented on a diff in the pull request:
https://github.com/apache/hadoop/pull/192#discussion_r101355213
--- Diff:
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java
---
@@ -1111,6 +1114,81 @@ boolean isStarvedForFairShare() {
return !Resources.isNone(fairshareStarvation);
}
+ /**
+ * Fetch a list of RRs corresponding to the extent the app is starved
+ * (fairshare and minshare). This method considers the number of
containers
+ * in a RR and also only one locality-level (the first encountered
+ * resourceName).
+ *
+ * @return list of {@link ResourceRequest}s corresponding to the amount
of
+ * starvation.
+ */
+ List<ResourceRequest> getStarvedResourceRequests() {
+ // List of RRs we build in this method to return
+ List<ResourceRequest> ret = new ArrayList<>();
+
+ // Track visited RRs to avoid the same RR at multiple locality levels
+ VisitedResourceRequestTracker visitedRRs =
+ new VisitedResourceRequestTracker(scheduler.getNodeTracker());
+
+ // Start with current starvation and track the pending amount
+ Resource pending = getStarvation();
+ for (ResourceRequest rr : appSchedulingInfo.getAllResourceRequests()) {
+ if (Resources.isNone(pending)) {
+ // Found enough RRs to match the starvation
+ break;
+ }
+
+ // See if we have already seen this RR
+ if (!visitedRRs.visit(rr)) {
+ continue;
+ }
+
+ // A RR can have multiple containers of a capability. We need to
+ // compute the number of containers that fit in "pending".
+ int numContainersThatFit = (int) Math.floor(
+ Resources.ratio(scheduler.getResourceCalculator(),
+ pending, rr.getCapability()));
+ if (numContainersThatFit == 0) {
+ // This RR's capability is too large to fit in pending
+ continue;
+ }
+
+ // If the RR is only partially being satisfied, include only the
+ // partial number of containers.
+ if (numContainersThatFit < rr.getNumContainers()) {
+ rr = ResourceRequest.newInstance(rr.getPriority(),
+ rr.getResourceName(), rr.getCapability(),
numContainersThatFit);
+ }
+
+ // Add the RR to return list and adjust "pending" accordingly
+ ret.add(rr);
+ Resources.subtractFromNonNegative(pending,
+ Resources.multiply(rr.getCapability(), rr.getNumContainers()));
+ }
+
+ return ret;
+ }
+
+ /**
+ * Notify this app that preemption has been triggered to make room for
+ * outstanding demand. The app should not be considered starved until
after
+ * the specified delay.
+ *
+ * @param delayBeforeNextStarvationCheck duration to wait
+ */
+ void preemptionTriggered(long delayBeforeNextStarvationCheck) {
+ nextStarvationCheck =
+ scheduler.getClock().getTime() + delayBeforeNextStarvationCheck;
+ }
+
+ /**
+ * Whether this app's starvation should be considered.
--- End diff --
Missing @return here, too.
> FS Preemption is a trickle for severely starved applications
> ------------------------------------------------------------
>
> Key: YARN-6163
> URL: https://issues.apache.org/jira/browse/YARN-6163
> Project: Hadoop YARN
> Issue Type: Sub-task
> Components: fairscheduler
> Affects Versions: 2.9.0
> Reporter: Karthik Kambatla
> Assignee: Karthik Kambatla
> Attachments: YARN-6163.004.patch, YARN-6163.005.patch,
> yarn-6163-1.patch, yarn-6163-2.patch
>
>
> With current logic, only one RR is considered per each instance of marking an
> application starved. This marking happens only on the update call that runs
> every 500ms. Due to this, an application that is severely starved takes
> forever to reach fairshare based on preemptions.
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]