adambalogh closed pull request #22289: [SPARK-25200][YARN] Allow specifying 
HADOOP_CONF_DIR as spark property
URL: https://github.com/apache/spark/pull/22289
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 0998757715457..77de0314f8f6b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -286,9 +286,11 @@ private[deploy] class SparkSubmitArguments(args: 
Seq[String], env: Map[String, S
 
     if (master.startsWith("yarn")) {
       val hasHadoopEnv = env.contains("HADOOP_CONF_DIR") || 
env.contains("YARN_CONF_DIR")
-      if (!hasHadoopEnv && !Utils.isTesting) {
+      val hasHadoopProp = sparkProperties.contains("spark.yarn.conf.dir")
+      if (!hasHadoopEnv && !hasHadoopProp && !Utils.isTesting) {
         error(s"When running with master '$master' " +
-          "either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the 
environment.")
+          "either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the 
environment, +" +
+          "or spark.yarn.conf.dir in the spark properties.")
       }
     }
 
diff --git 
a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java 
b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
index ce24400f557cd..ead48d3df1c2c 100644
--- 
a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
+++ 
b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
@@ -200,6 +200,7 @@ void addOptionString(List<String> cmd, String options) {
 
     addToClassPath(cp, getenv("HADOOP_CONF_DIR"));
     addToClassPath(cp, getenv("YARN_CONF_DIR"));
+    addToClassPath(cp, getEffectiveConfig().get("spark.yarn.conf.dir"));
     addToClassPath(cp, getenv("SPARK_DIST_CLASSPATH"));
     return new ArrayList<>(cp);
   }
diff --git 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 4a85898ef880b..33028d561b131 100644
--- 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -694,24 +694,21 @@ private[spark] class Client(
       }
     }
 
-    // SPARK-23630: during testing, Spark scripts filter out hadoop conf dirs 
so that user's
-    // environments do not interfere with tests. This allows a special env 
variable during
-    // tests so that custom conf dirs can be used by unit tests.
-    val confDirs = Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR") ++
-      (if (Utils.isTesting) Seq("SPARK_TEST_HADOOP_CONF_DIR") else Nil)
-
-    confDirs.foreach { envKey =>
-      sys.env.get(envKey).foreach { path =>
-        val dir = new File(path)
-        if (dir.isDirectory()) {
-          val files = dir.listFiles()
-          if (files == null) {
-            logWarning("Failed to list files under directory " + dir)
-          } else {
-            files.foreach { file =>
-              if (file.isFile && !hadoopConfFiles.contains(file.getName())) {
-                hadoopConfFiles(file.getName()) = file
-              }
+    val confDirsEnvKeys = Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR")
+    val confDirProp = sparkConf.getOption("spark.yarn.conf.dir")
+
+    val confDirPaths = (confDirsEnvKeys.map(sys.env.get) :+ 
confDirProp).flatten
+    confDirPaths.foreach { path =>
+      logDebug("Reading config files from " + path)
+      val dir = new File(path)
+      if (dir.isDirectory()) {
+        val files = dir.listFiles()
+        if (files == null) {
+          logWarning("Failed to list files under directory " + dir)
+        } else {
+          files.foreach { file =>
+            if (file.isFile && !hadoopConfFiles.contains(file.getName())) {
+              hadoopConfFiles(file.getName()) = file
             }
           }
         }
diff --git 
a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
 
b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 58d11e96942e1..e49d7c6001892 100644
--- 
a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ 
b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -131,8 +131,9 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
     val finalState = runSpark(false,
       mainClassName(YarnClusterDriverUseSparkHadoopUtilConf.getClass),
       appArgs = Seq("key=value", "spark.test.key=testvalue", 
result.getAbsolutePath()),
-      extraConf = Map("spark.hadoop.key" -> "value"),
-      extraEnv = Map("SPARK_TEST_HADOOP_CONF_DIR" -> 
customConf.getAbsolutePath()))
+      extraConf = Map(
+        "spark.hadoop.key" -> "value",
+        "spark.yarn.conf.dir" -> customConf.getAbsolutePath()))
     checkResult(finalState, result)
   }
 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to