zhouyejoe commented on a change in pull request #32007:
URL: https://github.com/apache/spark/pull/32007#discussion_r633258762



##########
File path: core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
##########
@@ -153,6 +189,59 @@ private[spark] class DiskBlockManager(conf: SparkConf, 
deleteFilesOnStop: Boolea
     }
   }
 
+  /**
+   * Get the list of configured local dirs storing merged shuffle blocks 
created by executors
+   * if push based shuffle is enabled. Note that the files in this directory 
will be created
+   * by the external shuffle services. We only create the merge_manager 
directories and
+   * subdirectories here because currently the shuffle service doesn't have 
permission to
+   * create directories under application local directories.
+   */
+  private def createLocalDirsForMergedShuffleBlocks(conf: SparkConf): 
Option[Array[File]] = {
+    if (Utils.isPushBasedShuffleEnabled(conf)) {
+      // Will create the merge_manager directory only if it doesn't exist 
under any local dir.
+      val localDirs = Utils.getConfiguredLocalDirs(conf)
+      var mergeDirCreated = false;
+      for (rootDir <- localDirs) {
+        val mergeDir = new File(rootDir, MERGE_MANAGER_DIR)
+        if (mergeDir.exists()) {
+          logDebug(s"Not creating $mergeDir as it already exists")
+          mergeDirCreated = true
+        }
+      }
+      if (!mergeDirCreated) {
+        // This executor didn't see any merge_manager directories, it will 
start creating them.
+        // It's possible that the other executors launched at the same time 
may also reach here but
+        // we are working on the assumption that the executors launched around 
the same time will
+        // have the same set of application local directories.
+        localDirs.foreach { rootDir =>
+          try {
+            val mergeDir = new File(rootDir, MERGE_MANAGER_DIR)
+            // Only one container will create this directory. The filesystem 
will handle any race
+            // conditions.
+            if (!mergeDir.exists()) {
+              Utils.createDirWith770(mergeDir)

Review comment:
       @otterc Added -p into the createDirWith770 method.

##########
File path: core/src/main/scala/org/apache/spark/storage/BlockId.scala
##########
@@ -87,6 +87,29 @@ case class ShufflePushBlockId(shuffleId: Int, mapIndex: Int, 
reduceId: Int) exte
   override def name: String = "shufflePush_" + shuffleId + "_" + mapIndex + 
"_" + reduceId
 }
 
+@DeveloperApi
+case class ShuffleMergedBlockId(appId: String, shuffleId: Int, reduceId: Int) 
extends BlockId {
+  override def name: String = "mergedShuffle_" + appId + "_" + shuffleId + "_" 
+ reduceId + ".data"
+}
+
+@DeveloperApi
+case class ShuffleMergedIndexBlockId(
+  appId: String,
+  shuffleId: Int,
+  reduceId: Int) extends BlockId {
+  override def name: String =
+    "mergedShuffle_" + appId + "_" + shuffleId + "_" + reduceId + ".index"

Review comment:
       Updated accordingly. Please help reivew. @mridulm @Ngone51 @otterc .
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to