[jira] [Commented] (SPARK-25200) Allow setting HADOOP_CONF_DIR as a spark property

ASF GitHub Bot (JIRA) Tue, 11 Dec 2018 03:11:24 -0800


    [ 
https://issues.apache.org/jira/browse/SPARK-25200?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16716848#comment-16716848
 ]


ASF GitHub Bot commented on SPARK-25200:
----------------------------------------

adambalogh closed pull request #22289: [SPARK-25200][YARN] Allow specifying 
HADOOP_CONF_DIR as spark property
URL: https://github.com/apache/spark/pull/22289
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 0998757715457..77de0314f8f6b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -286,9 +286,11 @@ private[deploy] class SparkSubmitArguments(args: 
Seq[String], env: Map[String, S
 
     if (master.startsWith("yarn")) {
       val hasHadoopEnv = env.contains("HADOOP_CONF_DIR") || 
env.contains("YARN_CONF_DIR")
-      if (!hasHadoopEnv && !Utils.isTesting) {
+      val hasHadoopProp = sparkProperties.contains("spark.yarn.conf.dir")
+      if (!hasHadoopEnv && !hasHadoopProp && !Utils.isTesting) {
         error(s"When running with master '$master' " +
-          "either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the 
environment.")
+          "either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the 
environment, +" +
+          "or spark.yarn.conf.dir in the spark properties.")
       }
     }
 
diff --git 
a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java 
b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
index ce24400f557cd..ead48d3df1c2c 100644
--- 
a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
+++ 
b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
@@ -200,6 +200,7 @@ void addOptionString(List<String> cmd, String options) {
 
     addToClassPath(cp, getenv("HADOOP_CONF_DIR"));
     addToClassPath(cp, getenv("YARN_CONF_DIR"));
+    addToClassPath(cp, getEffectiveConfig().get("spark.yarn.conf.dir"));
     addToClassPath(cp, getenv("SPARK_DIST_CLASSPATH"));
     return new ArrayList<>(cp);
   }
diff --git 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 4a85898ef880b..33028d561b131 100644
--- 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -694,24 +694,21 @@ private[spark] class Client(
       }
     }
 
-    // SPARK-23630: during testing, Spark scripts filter out hadoop conf dirs 
so that user's
-    // environments do not interfere with tests. This allows a special env 
variable during
-    // tests so that custom conf dirs can be used by unit tests.
-    val confDirs = Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR") ++
-      (if (Utils.isTesting) Seq("SPARK_TEST_HADOOP_CONF_DIR") else Nil)
-
-    confDirs.foreach { envKey =>
-      sys.env.get(envKey).foreach { path =>
-        val dir = new File(path)
-        if (dir.isDirectory()) {
-          val files = dir.listFiles()
-          if (files == null) {
-            logWarning("Failed to list files under directory " + dir)
-          } else {
-            files.foreach { file =>
-              if (file.isFile && !hadoopConfFiles.contains(file.getName())) {
-                hadoopConfFiles(file.getName()) = file
-              }
+    val confDirsEnvKeys = Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR")
+    val confDirProp = sparkConf.getOption("spark.yarn.conf.dir")
+
+    val confDirPaths = (confDirsEnvKeys.map(sys.env.get) :+ 
confDirProp).flatten
+    confDirPaths.foreach { path =>
+      logDebug("Reading config files from " + path)
+      val dir = new File(path)
+      if (dir.isDirectory()) {
+        val files = dir.listFiles()
+        if (files == null) {
+          logWarning("Failed to list files under directory " + dir)
+        } else {
+          files.foreach { file =>
+            if (file.isFile && !hadoopConfFiles.contains(file.getName())) {
+              hadoopConfFiles(file.getName()) = file
             }
           }
         }
diff --git 
a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
 
b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 58d11e96942e1..e49d7c6001892 100644
--- 
a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ 
b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -131,8 +131,9 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
     val finalState = runSpark(false,
       mainClassName(YarnClusterDriverUseSparkHadoopUtilConf.getClass),
       appArgs = Seq("key=value", "spark.test.key=testvalue", 
result.getAbsolutePath()),
-      extraConf = Map("spark.hadoop.key" -> "value"),
-      extraEnv = Map("SPARK_TEST_HADOOP_CONF_DIR" -> 
customConf.getAbsolutePath()))
+      extraConf = Map(
+        "spark.hadoop.key" -> "value",
+        "spark.yarn.conf.dir" -> customConf.getAbsolutePath()))
     checkResult(finalState, result)
   }
 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Allow setting HADOOP_CONF_DIR as a spark property
> -------------------------------------------------
>
>                 Key: SPARK-25200
>                 URL: https://issues.apache.org/jira/browse/SPARK-25200
>             Project: Spark
>          Issue Type: Bug
>          Components: Spark Core
>    Affects Versions: 2.3.1
>            Reporter: Adam Balogh
>            Priority: Major
>
> When submitting applications to Yarn in cluster mode, using the 
> InProcessLauncher, spark finds the cluster's configuration files based on the 
> HADOOP_CONF_DIR environment variable. This does not make it possible to 
> submit to more than one Yarn clusters concurrently using the 
> InProcessLauncher.
> I think we should make it possible to define HADOOP_CONF_DIR as a spark 
> property, so it can be different for each spark submission.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

[jira] [Commented] (SPARK-25200) Allow setting HADOOP_CONF_DIR as a spark property

Reply via email to