dongjoon-hyun commented on a change in pull request #35373:
URL: https://github.com/apache/spark/pull/35373#discussion_r796141733
##########
File path:
resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala
##########
@@ -117,29 +117,36 @@ class ExecutorRollDriverPlugin extends DriverPlugin with
Logging {
listWithoutDriver.sortBy(e => e.totalDuration.toFloat / Math.max(1,
e.totalTasks)).reverse
case ExecutorRollPolicy.FAILED_TASKS =>
listWithoutDriver.sortBy(_.failedTasks).reverse
- case ExecutorRollPolicy.OUTLIER =>
- // We build multiple outlier lists and concat in the following
importance order to find
- // outliers in various perspective:
- // AVERAGE_DURATION > TOTAL_DURATION > TOTAL_GC_TIME > FAILED_TASKS
- // Since we will choose only first item, the duplication is okay. If
there is no outlier,
- // We fallback to TOTAL_DURATION policy.
- outliers(listWithoutDriver.filter(_.totalTasks > 0), e =>
e.totalDuration / e.totalTasks) ++
- outliers(listWithoutDriver, e => e.totalDuration) ++
- outliers(listWithoutDriver, e => e.totalGCTime) ++
- outliers(listWithoutDriver, e => e.failedTasks) ++
- listWithoutDriver.sortBy(_.totalDuration).reverse
+ case ExecutorRollPolicy.OUTLIER => outliers(listWithoutDriver)
+ case ExecutorRollPolicy.OUTLIER_OR_TOTAL_DURATION =>
+ // If there is no outlier we fallback to TOTAL_DURATION policy.
+ outliers(listWithoutDriver) ++
listWithoutDriver.sortBy(_.totalDuration).reverse
}
sortedList.headOption.map(_.id)
}
+ /**
+ * We build multiple outlier lists and concat in the following importance
order to find
+ * outliers in various perspective:
+ * AVERAGE_DURATION > TOTAL_DURATION > TOTAL_GC_TIME > FAILED_TASKS
+ * Since we will choose only first item, the duplication is okay.
+ */
+ private def outliers(listWithoutDriver: Seq[v1.ExecutorSummary]) =
+ outliersForFn(
+ listWithoutDriver.filter(_.totalTasks > 0),
+ e => e.totalDuration / e.totalTasks) ++
+ outliersForFn(listWithoutDriver, e => e.totalDuration) ++
+ outliersForFn(listWithoutDriver, e => e.totalGCTime) ++
+ outliersForFn(listWithoutDriver, e => e.failedTasks)
+
/**
* Return executors whose metrics is outstanding, '(value - mean) >
2-sigma'. This is
* a best-effort approach because the snapshot of ExecutorSummary is not a
normal distribution.
* Outliers can be defined in several ways
(https://en.wikipedia.org/wiki/Outlier).
* Here, we borrowed 2-sigma idea from
https://en.wikipedia.org/wiki/68-95-99.7_rule.
* In case of normal distribution, this is known to be 2.5 percent roughly.
*/
- private def outliers(
+ private def outliersForFn(
Review comment:
Let's keep the original method name because this is focusing on a single
dimension outlier.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]