Github user tgravescs commented on a diff in the pull request: https://github.com/apache/spark/pull/21607#discussion_r197315441 --- Diff: core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala --- @@ -97,48 +102,48 @@ private[spark] class OutputCommitCoordinator(conf: SparkConf, isDriver: Boolean) } /** - * Called by the DAGScheduler when a stage starts. + * Called by the DAGScheduler when a stage starts. Initializes the stage's state if it hasn't + * yet been initialized. * * @param stage the stage id. * @param maxPartitionId the maximum partition id that could appear in this stage's tasks (i.e. * the maximum possible value of `context.partitionId`). */ - private[scheduler] def stageStart( - stage: StageId, - maxPartitionId: Int): Unit = { - val arr = new Array[TaskAttemptNumber](maxPartitionId + 1) - java.util.Arrays.fill(arr, NO_AUTHORIZED_COMMITTER) + private[scheduler] def stageStart(stage: Int, maxPartitionId: Int): Unit = synchronized { + val arr = Array.fill[TaskIdentifier](maxPartitionId + 1)(null) synchronized { --- End diff -- we have 2 nested synchronized
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org