Repository: spark
Updated Branches:
  refs/heads/branch-2.1 66a7ca28a -> 6da6a27f6


[SPARK-19707][CORE] Improve the invalid path check for sc.addJar

## What changes were proposed in this pull request?

Currently in Spark there're two issues when we add jars with invalid path:

* If the jar path is a empty string {--jar ",dummy.jar"}, then Spark will 
resolve it to the current directory path and add to classpath / file server, 
which is unwanted. This is happened in our programatic way to submit Spark 
application. From my understanding Spark should defensively filter out such 
empty path.
* If the jar path is a invalid path (file doesn't exist), `addJar` doesn't 
check it and will still add to file server, the exception will be delayed until 
job running. Actually this local path could be checked beforehand, no need to 
wait until task running. We have similar check in `addFile`, but lacks similar 
similar mechanism in `addJar`.

## How was this patch tested?

Add unit test and local manual verification.

Author: jerryshao <[email protected]>

Closes #17038 from jerryshao/SPARK-19707.

(cherry picked from commit b0a8c16fecd4337f77bfbe4b45884254d7af52bd)
Signed-off-by: Marcelo Vanzin <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6da6a27f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6da6a27f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6da6a27f

Branch: refs/heads/branch-2.1
Commit: 6da6a27f673f6e45fe619e0411fbaaa14ea34bfb
Parents: 66a7ca2
Author: jerryshao <[email protected]>
Authored: Fri Feb 24 09:28:59 2017 -0800
Committer: Marcelo Vanzin <[email protected]>
Committed: Fri Feb 24 09:29:12 2017 -0800

----------------------------------------------------------------------
 .../main/scala/org/apache/spark/SparkContext.scala  | 12 ++++++++++--
 .../main/scala/org/apache/spark/util/Utils.scala    |  2 +-
 .../scala/org/apache/spark/SparkContextSuite.scala  | 16 ++++++++++++++++
 .../scala/org/apache/spark/util/UtilsSuite.scala    |  1 +
 4 files changed, 28 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/6da6a27f/core/src/main/scala/org/apache/spark/SparkContext.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala 
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 2db48f6..5ae9db7 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1727,10 +1727,18 @@ class SparkContext(config: SparkConf) extends Logging {
           // A JAR file which exists only on the driver node
           case null | "file" =>
             try {
+              val file = new File(uri.getPath)
+              if (!file.exists()) {
+                throw new FileNotFoundException(s"Jar ${file.getAbsolutePath} 
not found")
+              }
+              if (file.isDirectory) {
+                throw new IllegalArgumentException(
+                  s"Directory ${file.getAbsoluteFile} is not allowed for 
addJar")
+              }
               env.rpcEnv.fileServer.addJar(new File(uri.getPath))
             } catch {
-              case exc: FileNotFoundException =>
-                logError(s"Jar not found at $path")
+              case NonFatal(e) =>
+                logError(s"Failed to add $path to Spark environment", e)
                 null
             }
           // A JAR file which exists locally on every worker node

http://git-wip-us.apache.org/repos/asf/spark/blob/6da6a27f/core/src/main/scala/org/apache/spark/util/Utils.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala 
b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 00b1b54..4cdfb9c 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2016,7 +2016,7 @@ private[spark] object Utils extends Logging {
     if (paths == null || paths.trim.isEmpty) {
       ""
     } else {
-      paths.split(",").map { p => Utils.resolveURI(p) }.mkString(",")
+      paths.split(",").filter(_.trim.nonEmpty).map { p => Utils.resolveURI(p) 
}.mkString(",")
     }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/6da6a27f/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala 
b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index c451c59..a2d25d2 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -289,6 +289,22 @@ class SparkContextSuite extends SparkFunSuite with 
LocalSparkContext {
     }
   }
 
+  test("add jar with invalid path") {
+    val tmpDir = Utils.createTempDir()
+    val tmpJar = File.createTempFile("test", ".jar", tmpDir)
+
+    sc = new SparkContext(new 
SparkConf().setAppName("test").setMaster("local"))
+    sc.addJar(tmpJar.getAbsolutePath)
+
+    // Invaid jar path will only print the error log, will not add to file 
server.
+    sc.addJar("dummy.jar")
+    sc.addJar("")
+    sc.addJar(tmpDir.getAbsolutePath)
+
+    sc.listJars().size should be (1)
+    sc.listJars().head should include (tmpJar.getName)
+  }
+
   test("Cancelling job group should not cause SparkContext to shutdown 
(SPARK-6414)") {
     try {
       sc = new SparkContext(new 
SparkConf().setAppName("test").setMaster("local"))

http://git-wip-us.apache.org/repos/asf/spark/blob/6da6a27f/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala 
b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index feacfb7..8706d72 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -484,6 +484,7 @@ class UtilsSuite extends SparkFunSuite with 
ResetSystemProperties with Logging {
       assertResolves("""hdfs:/jar1,file:/jar2,jar3,C:\pi.py#py.pi,C:\path 
to\jar4""",
         
s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py#py.pi,file:/C:/path%20to/jar4")
     }
+    assertResolves(",jar1,jar2", s"file:$cwd/jar1,file:$cwd/jar2")
   }
 
   test("nonLocalPaths") {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to