yaooqinn commented on a change in pull request #25962: [SPARK-29285][Shuffle]
Temporary shuffle files should be able to handle disk failures
URL: https://github.com/apache/spark/pull/25962#discussion_r339868020
##########
File path: core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
##########
@@ -117,20 +119,38 @@ private[spark] class DiskBlockManager(conf: SparkConf,
deleteFilesOnStop: Boolea
/** Produces a unique block id and File suitable for storing local
intermediate results. */
def createTempLocalBlock(): (TempLocalBlockId, File) = {
- var blockId = new TempLocalBlockId(UUID.randomUUID())
- while (getFile(blockId).exists()) {
- blockId = new TempLocalBlockId(UUID.randomUUID())
+ var blockId = TempLocalBlockId(UUID.randomUUID())
+ var tempLocalFile = getFile(blockId)
+ var count = 0
+ while (!canCreateFile(tempLocalFile) && count <
Utils.MAX_DIR_CREATION_ATTEMPTS) {
+ blockId = TempLocalBlockId(UUID.randomUUID())
+ tempLocalFile = getFile(blockId)
+ count += 1
}
- (blockId, getFile(blockId))
+ (blockId, tempLocalFile)
}
/** Produces a unique block id and File suitable for storing shuffled
intermediate results. */
def createTempShuffleBlock(): (TempShuffleBlockId, File) = {
- var blockId = new TempShuffleBlockId(UUID.randomUUID())
- while (getFile(blockId).exists()) {
- blockId = new TempShuffleBlockId(UUID.randomUUID())
+ var blockId = TempShuffleBlockId(UUID.randomUUID())
+ var tempShuffleFile = getFile(blockId)
+ var count = 0
+ while (!canCreateFile(tempShuffleFile) && count <
Utils.MAX_DIR_CREATION_ATTEMPTS) {
+ blockId = TempShuffleBlockId(UUID.randomUUID())
+ tempShuffleFile = getFile(blockId)
+ count += 1
+ }
+ (blockId, tempShuffleFile)
+ }
+
+ private def canCreateFile(file: File): Boolean = {
+ try {
+ file.createNewFile()
Review comment:
Yes, there is only one temp file be created or none, after or before this
change
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]