venkata91 commented on a change in pull request #26339: [SPARK-27194][SPARK-29302][SQL] Fix the issue that for dynamic partition overwrite a task would conflict with its speculative task URL: https://github.com/apache/spark/pull/26339#discussion_r403841559
########## File path: core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala ########## @@ -91,6 +91,31 @@ class HadoopMapReduceCommitProtocol( */ private def stagingDir = new Path(path, ".spark-staging-" + jobId) + /** + * Get staging path for a task with dynamicPartitionOverwrite=true. + */ + private def dynamicStagingTaskPath(dir: Option[String], taskContext: TaskAttemptContext): Path = { + assert(dynamicPartitionOverwrite && dir.isDefined) + val attemptID = taskContext.getTaskAttemptID.getId + new Path(stagingDir, s"${dir.get}-${attemptID}") + } + + /** + * Tracks the staging task files with dynamicPartitionOverwrite=true. + */ + @transient private var dynamicStagingTaskFiles: mutable.Set[Path] = null + + /** + * Get responding partition path for a task with dynamicPartitionOverwrite=true. + */ + private def getDynamicPartitionPath(stagingTaskFile: Path, context: TaskAttemptContext): Path = { + assert(dynamicPartitionOverwrite) Review comment: Same here, do we need assert? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org