squito commented on a change in pull request #23393: [SPARK-26288][CORE]add 
initRegisteredExecutorsDB
URL: https://github.com/apache/spark/pull/23393#discussion_r246060339
 
 

 ##########
 File path: 
core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
 ##########
 @@ -56,11 +60,55 @@ class ExternalShuffleService(sparkConf: SparkConf, 
securityManager: SecurityMana
 
   private var server: TransportServer = _
 
+  private final val  MAX_DIR_CREATION_ATTEMPTS = 10
+
   private val shuffleServiceSource = new ExternalShuffleServiceSource
 
+  protected def createDirectory(root: String, name: String): File = {
+    var attempts = 0
+    val maxAttempts = MAX_DIR_CREATION_ATTEMPTS
+    var dir: File = null
+    while (dir == null) {
+      attempts += 1
+      if (attempts > maxAttempts) {
+        throw new IOException("Failed to create a temp directory (under " + 
root + ") after " +
+          maxAttempts + " attempts!")
+      }
+      try {
+        dir = new File(root, "registeredExecutors")
+        if (!dir.exists() && !dir.mkdirs()) {
+          dir = null
+        }
+      } catch { case e: SecurityException => dir = null; }
+    }
+    logInfo(s"registeredExecutorsDb path is ${dir.getAbsolutePath}")
+    new File(dir.getAbsolutePath, name)
+  }
+
+  protected def initRegisteredExecutorsDB(dbName: String): File = {
+    val localDirs = sparkConf.get("spark.local.dir", "").split(",")
+    if (localDirs.length >= 1 && !"".equals(localDirs(0))) {
+      createDirectory(localDirs(0), dbName)
 
 Review comment:
   yes I think WorkDirCleanup maybe just what we need to ensure things get 
cleaned up, good idea.
   
   I understand wanting to use a consistent directory, but like I said I'm 
worried about restarts after configuration changes (maybe not a concern in a 
standalone mode?  does it always require a total restart?)  You could do 
something like what was done in the original patch for yarn, to check all the 
dirs, but fallback to dir[0] (that code has since changed to take advantage of 
other yarn features for recovery):
   
   
https://github.com/apache/spark/blob/708036c1de52d674ceff30ac465e1dcedeb8dde8/network/yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java#L192-L200

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to