[ https://issues.apache.org/jira/browse/SPARK-25200?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16716848#comment-16716848 ]
ASF GitHub Bot commented on SPARK-25200: ---------------------------------------- adambalogh closed pull request #22289: [SPARK-25200][YARN] Allow specifying HADOOP_CONF_DIR as spark property URL: https://github.com/apache/spark/pull/22289 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 0998757715457..77de0314f8f6b 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -286,9 +286,11 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S if (master.startsWith("yarn")) { val hasHadoopEnv = env.contains("HADOOP_CONF_DIR") || env.contains("YARN_CONF_DIR") - if (!hasHadoopEnv && !Utils.isTesting) { + val hasHadoopProp = sparkProperties.contains("spark.yarn.conf.dir") + if (!hasHadoopEnv && !hasHadoopProp && !Utils.isTesting) { error(s"When running with master '$master' " + - "either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment.") + "either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment, +" + + "or spark.yarn.conf.dir in the spark properties.") } } diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java index ce24400f557cd..ead48d3df1c2c 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java @@ -200,6 +200,7 @@ void addOptionString(List<String> cmd, String options) { addToClassPath(cp, getenv("HADOOP_CONF_DIR")); addToClassPath(cp, getenv("YARN_CONF_DIR")); + addToClassPath(cp, getEffectiveConfig().get("spark.yarn.conf.dir")); addToClassPath(cp, getenv("SPARK_DIST_CLASSPATH")); return new ArrayList<>(cp); } diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 4a85898ef880b..33028d561b131 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -694,24 +694,21 @@ private[spark] class Client( } } - // SPARK-23630: during testing, Spark scripts filter out hadoop conf dirs so that user's - // environments do not interfere with tests. This allows a special env variable during - // tests so that custom conf dirs can be used by unit tests. - val confDirs = Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR") ++ - (if (Utils.isTesting) Seq("SPARK_TEST_HADOOP_CONF_DIR") else Nil) - - confDirs.foreach { envKey => - sys.env.get(envKey).foreach { path => - val dir = new File(path) - if (dir.isDirectory()) { - val files = dir.listFiles() - if (files == null) { - logWarning("Failed to list files under directory " + dir) - } else { - files.foreach { file => - if (file.isFile && !hadoopConfFiles.contains(file.getName())) { - hadoopConfFiles(file.getName()) = file - } + val confDirsEnvKeys = Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR") + val confDirProp = sparkConf.getOption("spark.yarn.conf.dir") + + val confDirPaths = (confDirsEnvKeys.map(sys.env.get) :+ confDirProp).flatten + confDirPaths.foreach { path => + logDebug("Reading config files from " + path) + val dir = new File(path) + if (dir.isDirectory()) { + val files = dir.listFiles() + if (files == null) { + logWarning("Failed to list files under directory " + dir) + } else { + files.foreach { file => + if (file.isFile && !hadoopConfFiles.contains(file.getName())) { + hadoopConfFiles(file.getName()) = file } } } diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala index 58d11e96942e1..e49d7c6001892 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala @@ -131,8 +131,9 @@ class YarnClusterSuite extends BaseYarnClusterSuite { val finalState = runSpark(false, mainClassName(YarnClusterDriverUseSparkHadoopUtilConf.getClass), appArgs = Seq("key=value", "spark.test.key=testvalue", result.getAbsolutePath()), - extraConf = Map("spark.hadoop.key" -> "value"), - extraEnv = Map("SPARK_TEST_HADOOP_CONF_DIR" -> customConf.getAbsolutePath())) + extraConf = Map( + "spark.hadoop.key" -> "value", + "spark.yarn.conf.dir" -> customConf.getAbsolutePath())) checkResult(finalState, result) } ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Allow setting HADOOP_CONF_DIR as a spark property > ------------------------------------------------- > > Key: SPARK-25200 > URL: https://issues.apache.org/jira/browse/SPARK-25200 > Project: Spark > Issue Type: Bug > Components: Spark Core > Affects Versions: 2.3.1 > Reporter: Adam Balogh > Priority: Major > > When submitting applications to Yarn in cluster mode, using the > InProcessLauncher, spark finds the cluster's configuration files based on the > HADOOP_CONF_DIR environment variable. This does not make it possible to > submit to more than one Yarn clusters concurrently using the > InProcessLauncher. > I think we should make it possible to define HADOOP_CONF_DIR as a spark > property, so it can be different for each spark submission. -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org