This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new ed036a9d0aa [SPARK-44658][CORE] `ShuffleStatus.getMapStatus` should return `None` instead of `Some(null)` ed036a9d0aa is described below commit ed036a9d0aab2d75b5c0db5caebfc158ce22ec15 Author: Dongjoon Hyun <dh...@apple.com> AuthorDate: Thu Aug 3 14:18:16 2023 -0700 [SPARK-44658][CORE] `ShuffleStatus.getMapStatus` should return `None` instead of `Some(null)` ### What changes were proposed in this pull request? This PR is for `master` and `branch-3.5` and aims to fix a regression due to SPARK-43043 which landed at Apache Spark 3.4.1 and reverted via SPARK-44630. This PR makes `ShuffleStatus.getMapStatus` return `None` instead of `Some(null)`. ### Why are the changes needed? `None` is better because `Some(null)` is unsafe because it causes NPE in some cases. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs with the newly added test case. Closes #42323 from dongjoon-hyun/SPARK-44658. Authored-by: Dongjoon Hyun <dh...@apple.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- core/src/main/scala/org/apache/spark/MapOutputTracker.scala | 5 ++++- core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala | 9 +++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala index 47ac3df4cc6..3495536a350 100644 --- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala +++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala @@ -171,7 +171,10 @@ private class ShuffleStatus( * Get the map output that corresponding to a given mapId. */ def getMapStatus(mapId: Long): Option[MapStatus] = withReadLock { - mapIdToMapIndex.get(mapId).map(mapStatuses(_)) + mapIdToMapIndex.get(mapId).map(mapStatuses(_)) match { + case Some(null) => None + case m => m + } } /** diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala index 7ac3d0092c8..7ee36137e27 100644 --- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala +++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala @@ -1083,4 +1083,13 @@ class MapOutputTrackerSuite extends SparkFunSuite with LocalSparkContext { rpcEnv.shutdown() } } + + test("SPARK-44658: ShuffleStatus.getMapStatus should return None") { + val bmID = BlockManagerId("a", "hostA", 1000) + val mapStatus = MapStatus(bmID, Array(1000L, 10000L), mapTaskId = 0) + val shuffleStatus = new ShuffleStatus(1000) + shuffleStatus.addMapOutput(mapIndex = 1, mapStatus) + shuffleStatus.removeMapOutput(mapIndex = 1, bmID) + assert(shuffleStatus.getMapStatus(0).isEmpty) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org