spark git commit: [SPARK-17512][CORE] Avoid formatting to python path for yarn and mesos cluster mode
Repository: spark Updated Branches: refs/heads/branch-2.0 cd0bd89d7 -> 59e6ab11a [SPARK-17512][CORE] Avoid formatting to python path for yarn and mesos cluster mode ## What changes were proposed in this pull request? Yarn and mesos cluster mode support remote python path (HDFS/S3 scheme) by their own mechanism, it is not necessary to check and format the python when running on these modes. This is a potential regression compared to 1.6, so here propose to fix it. ## How was this patch tested? Unit test to verify SparkSubmit arguments, also with local cluster verification. Because of lack of `MiniDFSCluster` support in Spark unit test, there's no integration test added. Author: jerryshaoCloses #15137 from jerryshao/SPARK-17512. (cherry picked from commit 8c3ee2bc42e6320b9341cebdba51a00162c897ea) Signed-off-by: Andrew Or Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/59e6ab11 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/59e6ab11 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/59e6ab11 Branch: refs/heads/branch-2.0 Commit: 59e6ab11a9e27d30ae3477fdc03337ff5f8ab4ec Parents: cd0bd89 Author: jerryshao Authored: Wed Sep 21 17:57:21 2016 -0400 Committer: Andrew Or Committed: Wed Sep 21 17:57:33 2016 -0400 -- .../org/apache/spark/deploy/SparkSubmit.scala| 13 ++--- .../apache/spark/deploy/SparkSubmitSuite.scala | 19 +++ 2 files changed, 29 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/59e6ab11/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 7b6d5a3..8061165 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -311,7 +311,7 @@ object SparkSubmit { // In Mesos cluster mode, non-local python files are automatically downloaded by Mesos. if (args.isPython && !isYarnCluster && !isMesosCluster) { if (Utils.nonLocalPaths(args.primaryResource).nonEmpty) { -printErrorAndExit(s"Only local python files are supported: $args.primaryResource") +printErrorAndExit(s"Only local python files are supported: ${args.primaryResource}") } val nonLocalPyFiles = Utils.nonLocalPaths(args.pyFiles).mkString(",") if (nonLocalPyFiles.nonEmpty) { @@ -322,7 +322,7 @@ object SparkSubmit { // Require all R files to be local if (args.isR && !isYarnCluster) { if (Utils.nonLocalPaths(args.primaryResource).nonEmpty) { -printErrorAndExit(s"Only local R files are supported: $args.primaryResource") +printErrorAndExit(s"Only local R files are supported: ${args.primaryResource}") } } @@ -633,7 +633,14 @@ object SparkSubmit { // explicitly sets `spark.submit.pyFiles` in his/her default properties file. sysProps.get("spark.submit.pyFiles").foreach { pyFiles => val resolvedPyFiles = Utils.resolveURIs(pyFiles) - val formattedPyFiles = PythonRunner.formatPaths(resolvedPyFiles).mkString(",") + val formattedPyFiles = if (!isYarnCluster && !isMesosCluster) { +PythonRunner.formatPaths(resolvedPyFiles).mkString(",") + } else { +// Ignoring formatting python path in yarn and mesos cluster mode, these two modes +// support dealing with remote python files, they could distribute and add python files +// locally. +resolvedPyFiles + } sysProps("spark.submit.pyFiles") = formattedPyFiles } http://git-wip-us.apache.org/repos/asf/spark/blob/59e6ab11/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index b2bc886..54693c1 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -577,6 +577,25 @@ class SparkSubmitSuite val sysProps3 = SparkSubmit.prepareSubmitEnvironment(appArgs3)._3 sysProps3("spark.submit.pyFiles") should be( PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)).mkString(",")) + +// Test remote python files +val f4 = File.createTempFile("test-submit-remote-python-files", "", tmpDir) +val writer4 = new PrintWriter(f4) +val remotePyFiles =
spark git commit: [SPARK-17512][CORE] Avoid formatting to python path for yarn and mesos cluster mode
Repository: spark Updated Branches: refs/heads/master 9fcf1c51d -> 8c3ee2bc4 [SPARK-17512][CORE] Avoid formatting to python path for yarn and mesos cluster mode ## What changes were proposed in this pull request? Yarn and mesos cluster mode support remote python path (HDFS/S3 scheme) by their own mechanism, it is not necessary to check and format the python when running on these modes. This is a potential regression compared to 1.6, so here propose to fix it. ## How was this patch tested? Unit test to verify SparkSubmit arguments, also with local cluster verification. Because of lack of `MiniDFSCluster` support in Spark unit test, there's no integration test added. Author: jerryshaoCloses #15137 from jerryshao/SPARK-17512. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8c3ee2bc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8c3ee2bc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8c3ee2bc Branch: refs/heads/master Commit: 8c3ee2bc42e6320b9341cebdba51a00162c897ea Parents: 9fcf1c5 Author: jerryshao Authored: Wed Sep 21 17:57:21 2016 -0400 Committer: Andrew Or Committed: Wed Sep 21 17:57:21 2016 -0400 -- .../org/apache/spark/deploy/SparkSubmit.scala| 13 ++--- .../apache/spark/deploy/SparkSubmitSuite.scala | 19 +++ 2 files changed, 29 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8c3ee2bc/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 7b6d5a3..8061165 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -311,7 +311,7 @@ object SparkSubmit { // In Mesos cluster mode, non-local python files are automatically downloaded by Mesos. if (args.isPython && !isYarnCluster && !isMesosCluster) { if (Utils.nonLocalPaths(args.primaryResource).nonEmpty) { -printErrorAndExit(s"Only local python files are supported: $args.primaryResource") +printErrorAndExit(s"Only local python files are supported: ${args.primaryResource}") } val nonLocalPyFiles = Utils.nonLocalPaths(args.pyFiles).mkString(",") if (nonLocalPyFiles.nonEmpty) { @@ -322,7 +322,7 @@ object SparkSubmit { // Require all R files to be local if (args.isR && !isYarnCluster) { if (Utils.nonLocalPaths(args.primaryResource).nonEmpty) { -printErrorAndExit(s"Only local R files are supported: $args.primaryResource") +printErrorAndExit(s"Only local R files are supported: ${args.primaryResource}") } } @@ -633,7 +633,14 @@ object SparkSubmit { // explicitly sets `spark.submit.pyFiles` in his/her default properties file. sysProps.get("spark.submit.pyFiles").foreach { pyFiles => val resolvedPyFiles = Utils.resolveURIs(pyFiles) - val formattedPyFiles = PythonRunner.formatPaths(resolvedPyFiles).mkString(",") + val formattedPyFiles = if (!isYarnCluster && !isMesosCluster) { +PythonRunner.formatPaths(resolvedPyFiles).mkString(",") + } else { +// Ignoring formatting python path in yarn and mesos cluster mode, these two modes +// support dealing with remote python files, they could distribute and add python files +// locally. +resolvedPyFiles + } sysProps("spark.submit.pyFiles") = formattedPyFiles } http://git-wip-us.apache.org/repos/asf/spark/blob/8c3ee2bc/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index 961ece3..31c8fb2 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -582,6 +582,25 @@ class SparkSubmitSuite val sysProps3 = SparkSubmit.prepareSubmitEnvironment(appArgs3)._3 sysProps3("spark.submit.pyFiles") should be( PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)).mkString(",")) + +// Test remote python files +val f4 = File.createTempFile("test-submit-remote-python-files", "", tmpDir) +val writer4 = new PrintWriter(f4) +val remotePyFiles = "hdfs:///tmp/file1.py,hdfs:///tmp/file2.py" +writer4.println("spark.submit.pyFiles " + remotePyFiles) +writer4.close() +val clArgs4 = Seq( +