Ngone51 commented on code in PR #36162:
URL: https://github.com/apache/spark/pull/36162#discussion_r903830839
##########
core/src/main/scala/org/apache/spark/internal/config/package.scala:
##########
@@ -2073,6 +2073,41 @@ package object config {
.timeConf(TimeUnit.MILLISECONDS)
.createOptional
+ private[spark] val SPECULATION_EFFICIENCY_TASK_PROCESS_RATE_MULTIPLIER =
+ ConfigBuilder("spark.speculation.efficiency.processRateMultiplier")
+ .doc("A multiplier that used when evaluating inefficient tasks. The
higher the multiplier " +
+ "is, the more tasks will be possibly considered as inefficient.")
+ .version("3.4.0")
+ .doubleConf
+ .checkValue(v => v > 0.0 && v <= 1.0, "multiplier must be in (0.0, 1.0]")
+ .createWithDefault(0.75)
+
+ private[spark] val SPECULATION_EFFICIENCY_TASK_DURATION_FACTOR =
+ ConfigBuilder("spark.speculation.efficiency.longRunTaskFactor")
+ .doc(s"A task will be speculated anyway as long as its duration has
exceeded the value of " +
+ s"multiplying the factor and the time threshold (either be
${SPECULATION_MULTIPLIER.key} " +
+ s"* successfulTaskDurations.median or
${SPECULATION_MIN_THRESHOLD.key}) regardless of " +
+ s"it's efficient or not. This avoids missing the tasks when task slow
isn't due to data" +
Review Comment:
nit: "...regardless of it's data process rate is good. This avoids missing
the in efficient tasks when task slow isn't related to..."
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]