This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new c04e0de0733 [SPARK-42808][CORE] Avoid getting availableProcessors every time in `MapOutputTrackerMaster#getStatistics` c04e0de0733 is described below commit c04e0de073354458f89d30733134a004fe2a25bd Author: sychen <syc...@ctrip.com> AuthorDate: Tue Mar 21 09:57:06 2023 -0500 [SPARK-42808][CORE] Avoid getting availableProcessors every time in `MapOutputTrackerMaster#getStatistics` ### What changes were proposed in this pull request? The return value of `Runtime.getRuntime.availableProcessors` is generally a fixed value. It is not necessary to obtain it every time `getStatistics` is called to avoid a native method call. ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? exist UT Closes #40440 from cxzl25/SPARK-42808. Authored-by: sychen <syc...@ctrip.com> Signed-off-by: Sean Owen <sro...@gmail.com> --- core/src/main/scala/org/apache/spark/MapOutputTracker.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala index 5772285a63d..5ad62159d24 100644 --- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala +++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala @@ -697,6 +697,8 @@ private[spark] class MapOutputTrackerMaster( pool } + private val availableProcessors = Runtime.getRuntime.availableProcessors() + // Make sure that we aren't going to exceed the max RPC message size by making sure // we use broadcast to send large map output statuses. if (minSizeForBroadcast > maxRpcMessageSize) { @@ -966,7 +968,7 @@ private[spark] class MapOutputTrackerMaster( val parallelAggThreshold = conf.get( SHUFFLE_MAP_OUTPUT_PARALLEL_AGGREGATION_THRESHOLD) val parallelism = math.min( - Runtime.getRuntime.availableProcessors(), + availableProcessors, statuses.length.toLong * totalSizes.length / parallelAggThreshold + 1).toInt if (parallelism <= 1) { statuses.filter(_ != null).foreach { s => --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org