AngersZhuuuu commented on a change in pull request #29966:
URL: https://github.com/apache/spark/pull/29966#discussion_r533898392



##########
File path: core/src/main/scala/org/apache/spark/SparkContext.scala
##########
@@ -1919,18 +1919,26 @@ class SparkContext(config: SparkConf) extends Logging {
           // A JAR file which exists only on the driver node
           case "file" => addLocalJarFile(new File(uri.getPath))
           // A JAR file which exists locally on every worker node
-          case "local" => "file:" + uri.getPath
+          case "local" => Seq("file:" + uri.getPath)
+          case "ivy" =>
+            // Since `new Path(path).toUri` will lose query information,
+            // so here we use `URI.create(path)`
+            DependencyUtils.resolveMavenDependencies(URI.create(path))
           case _ => checkRemoteJarFile(path)
         }
       }
-      if (key != null) {
+      if (keys.nonEmpty) {
         val timestamp = if (addedOnSubmit) startTime else 
System.currentTimeMillis
-        if (addedJars.putIfAbsent(key, timestamp).isEmpty) {
-          logInfo(s"Added JAR $path at $key with timestamp $timestamp")
+        val (added, existed) = keys.partition(addedJars.putIfAbsent(_, 
timestamp).isEmpty)
+        if (added.nonEmpty) {
+          logInfo(s"Added jar or dependency jars of ivy URI with $path" +

Review comment:
       Yea Done

##########
File path: core/src/main/scala/org/apache/spark/SparkContext.scala
##########
@@ -1919,18 +1919,26 @@ class SparkContext(config: SparkConf) extends Logging {
           // A JAR file which exists only on the driver node
           case "file" => addLocalJarFile(new File(uri.getPath))
           // A JAR file which exists locally on every worker node
-          case "local" => "file:" + uri.getPath
+          case "local" => Seq("file:" + uri.getPath)
+          case "ivy" =>
+            // Since `new Path(path).toUri` will lose query information,
+            // so here we use `URI.create(path)`
+            DependencyUtils.resolveMavenDependencies(URI.create(path))
           case _ => checkRemoteJarFile(path)
         }
       }
-      if (key != null) {
+      if (keys.nonEmpty) {
         val timestamp = if (addedOnSubmit) startTime else 
System.currentTimeMillis
-        if (addedJars.putIfAbsent(key, timestamp).isEmpty) {
-          logInfo(s"Added JAR $path at $key with timestamp $timestamp")
+        val (added, existed) = keys.partition(addedJars.putIfAbsent(_, 
timestamp).isEmpty)
+        if (added.nonEmpty) {
+          logInfo(s"Added jar or dependency jars of ivy URI with $path" +
+            s" at ${added.mkString(",")} with timestamp $timestamp")
           postEnvironmentUpdate()
-        } else {
-          logWarning(s"The jar $path has been added already. Overwriting of 
added jars " +
-            "is not supported in the current version.")
+        }
+        if (existed.nonEmpty) {

Review comment:
       > Could you add tests to check if this warning message is shown only 
once by using `LogAppender`?
   
   Sure

##########
File path: core/src/test/scala/org/apache/spark/SparkContextSuite.scala
##########
@@ -955,6 +978,121 @@ class SparkContextSuite extends SparkFunSuite with 
LocalSparkContext with Eventu
         .set(EXECUTOR_ALLOW_SPARK_CONTEXT, true)).stop()
     }
   }
+
+  test("SPARK-33084: Add jar support ivy url -- default transitive = false") {
+    sc = new SparkContext(new 
SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0")
+    
assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+    
assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")
+    
assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+  }
+
+  test("SPARK-33084: Add jar support ivy url -- invalid transitive use default 
false") {
+    sc = new SparkContext(new 
SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=foo")
+    
assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+    assert(!sc.listJars().exists(_.contains("org.slf4j_slf4j-api-1.7.10.jar")))
+    
assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+  }
+
+  test("SPARK-33084: Add jar support ivy url -- transitive=true will download 
dependency jars") {
+    sc = new SparkContext(new 
SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")
+    
assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+    assert(sc.listJars().exists(_.contains("org.slf4j_slf4j-api-1.7.10.jar")))
+    
assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+  }
+
+  test("SPARK-33084: Add jar support ivy url -- test exclude param when 
transitive=true") {
+    sc = new SparkContext(new 
SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0" +
+      "?exclude=commons-lang:commons-lang&transitive=true")
+    
assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+    assert(sc.listJars().exists(_.contains("org.slf4j_slf4j-api-1.7.10.jar")))
+    
assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+  }
+
+  test("SPARK-33084: Add jar support ivy url -- test different version") {
+    sc = new SparkContext(new 
SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0")
+    
assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.6.0")
+    
assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.6.0.jar")))
+  }
+
+  test("SPARK-33084: Add jar support ivy url -- test invalid param") {
+    sc = new SparkContext(new 
SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?invalidParam=foo")

Review comment:
       > https://github.com/apache/spark/pull/29966/files#r533579040
   > If we show a warning message for this case, we need tests for that.
   
   Done




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to