Ngone51 commented on code in PR #36162:
URL: https://github.com/apache/spark/pull/36162#discussion_r903830839


##########
core/src/main/scala/org/apache/spark/internal/config/package.scala:
##########
@@ -2073,6 +2073,41 @@ package object config {
       .timeConf(TimeUnit.MILLISECONDS)
       .createOptional
 
+  private[spark] val SPECULATION_EFFICIENCY_TASK_PROCESS_RATE_MULTIPLIER =
+    ConfigBuilder("spark.speculation.efficiency.processRateMultiplier")
+      .doc("A multiplier that used when evaluating inefficient tasks. The 
higher the multiplier " +
+        "is, the more tasks will be possibly considered as inefficient.")
+      .version("3.4.0")
+      .doubleConf
+      .checkValue(v => v > 0.0 && v <= 1.0, "multiplier must be in (0.0, 1.0]")
+      .createWithDefault(0.75)
+
+  private[spark] val SPECULATION_EFFICIENCY_TASK_DURATION_FACTOR =
+    ConfigBuilder("spark.speculation.efficiency.longRunTaskFactor")
+      .doc(s"A task will be speculated anyway as long as its duration has 
exceeded the value of " +
+        s"multiplying the factor and the time threshold (either be 
${SPECULATION_MULTIPLIER.key} " +
+        s"* successfulTaskDurations.median or 
${SPECULATION_MIN_THRESHOLD.key}) regardless of " +
+        s"it's efficient or not. This avoids missing the tasks when task slow 
isn't due to data" +

Review Comment:
   nit: "...regardless of it's data process rate is good or not.  This avoids 
missing the inefficient tasks when task slow isn't related to..."



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to