Ngone51 commented on a change in pull request #32007:
URL: https://github.com/apache/spark/pull/32007#discussion_r637973198
##########
File path: core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
##########
@@ -153,6 +204,60 @@ private[spark] class DiskBlockManager(conf: SparkConf,
deleteFilesOnStop: Boolea
}
}
+ /**
+ * Get the list of configured local dirs storing merged shuffle blocks
created by executors
+ * if push based shuffle is enabled. Note that the files in this directory
will be created
+ * by the external shuffle services. We only create the merge_manager
directories and
+ * subdirectories here because currently the shuffle service doesn't have
permission to
+ * create directories under application local directories.
+ */
+ private def createLocalDirsForMergedShuffleBlocks(conf: SparkConf):
Array[File] = {
+ if (Utils.isPushBasedShuffleEnabled(conf)) {
+ // Will create the merge_manager directory only if it doesn't exist
under any local dir.
+ for (rootDir <- configuredLocalDirs) {
+ val mergeDir = new File(rootDir, mergeDirName)
+ if (!mergeDir.exists()) {
+ logDebug(s"Creating $mergeDir as it does not exist")
+ // This executor didn't see merge_manager in the local dir, it will
start creating them.
+ // It's possible that the other executors launched at the same time
may also reach here
+ // but we are working on the assumption that the executors launched
around the same time
+ // will have the same set of application local directories.
+ try {
+ val mergeDir = new File(rootDir, mergeDirName)
+ for (dirNum <- 0 until subDirsPerLocalDir) {
+ // Only one container will create this directory. The filesystem
will handle any race
+ // conditions.
+ val sudDir = new File(mergeDir, "%02x".format(dirNum))
+ Utils.createDirWithCustomizedPermission(sudDir, "770")
+ }
+ logInfo(s"Merge directory at $mergeDir")
+ } catch {
+ case e: IOException =>
+ logError(
+ s"Failed to create merge dir in $rootDir. Ignoring this
directory.", e)
+ }
+ }
+ }
+ }
+ findActiveMergedShuffleDirs(conf)
+ }
+
+ private def findActiveMergedShuffleDirs(conf: SparkConf): Array[File] = {
Review comment:
This is only called once. Shall we inline it into
`createLocalDirsForMergedShuffleBlocks`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]