AngersZhuuuu commented on a change in pull request #29966:
URL: https://github.com/apache/spark/pull/29966#discussion_r533898392
##########
File path: core/src/main/scala/org/apache/spark/SparkContext.scala
##########
@@ -1919,18 +1919,26 @@ class SparkContext(config: SparkConf) extends Logging {
// A JAR file which exists only on the driver node
case "file" => addLocalJarFile(new File(uri.getPath))
// A JAR file which exists locally on every worker node
- case "local" => "file:" + uri.getPath
+ case "local" => Seq("file:" + uri.getPath)
+ case "ivy" =>
+ // Since `new Path(path).toUri` will lose query information,
+ // so here we use `URI.create(path)`
+ DependencyUtils.resolveMavenDependencies(URI.create(path))
case _ => checkRemoteJarFile(path)
}
}
- if (key != null) {
+ if (keys.nonEmpty) {
val timestamp = if (addedOnSubmit) startTime else
System.currentTimeMillis
- if (addedJars.putIfAbsent(key, timestamp).isEmpty) {
- logInfo(s"Added JAR $path at $key with timestamp $timestamp")
+ val (added, existed) = keys.partition(addedJars.putIfAbsent(_,
timestamp).isEmpty)
+ if (added.nonEmpty) {
+ logInfo(s"Added jar or dependency jars of ivy URI with $path" +
Review comment:
Yea Done
##########
File path: core/src/main/scala/org/apache/spark/SparkContext.scala
##########
@@ -1919,18 +1919,26 @@ class SparkContext(config: SparkConf) extends Logging {
// A JAR file which exists only on the driver node
case "file" => addLocalJarFile(new File(uri.getPath))
// A JAR file which exists locally on every worker node
- case "local" => "file:" + uri.getPath
+ case "local" => Seq("file:" + uri.getPath)
+ case "ivy" =>
+ // Since `new Path(path).toUri` will lose query information,
+ // so here we use `URI.create(path)`
+ DependencyUtils.resolveMavenDependencies(URI.create(path))
case _ => checkRemoteJarFile(path)
}
}
- if (key != null) {
+ if (keys.nonEmpty) {
val timestamp = if (addedOnSubmit) startTime else
System.currentTimeMillis
- if (addedJars.putIfAbsent(key, timestamp).isEmpty) {
- logInfo(s"Added JAR $path at $key with timestamp $timestamp")
+ val (added, existed) = keys.partition(addedJars.putIfAbsent(_,
timestamp).isEmpty)
+ if (added.nonEmpty) {
+ logInfo(s"Added jar or dependency jars of ivy URI with $path" +
+ s" at ${added.mkString(",")} with timestamp $timestamp")
postEnvironmentUpdate()
- } else {
- logWarning(s"The jar $path has been added already. Overwriting of
added jars " +
- "is not supported in the current version.")
+ }
+ if (existed.nonEmpty) {
Review comment:
> Could you add tests to check if this warning message is shown only
once by using `LogAppender`?
Sure
##########
File path: core/src/test/scala/org/apache/spark/SparkContextSuite.scala
##########
@@ -955,6 +978,121 @@ class SparkContextSuite extends SparkFunSuite with
LocalSparkContext with Eventu
.set(EXECUTOR_ALLOW_SPARK_CONTEXT, true)).stop()
}
}
+
+ test("SPARK-33084: Add jar support ivy url -- default transitive = false") {
+ sc = new SparkContext(new
SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+ sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0")
+
assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+
assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+
+ sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")
+
assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+ }
+
+ test("SPARK-33084: Add jar support ivy url -- invalid transitive use default
false") {
+ sc = new SparkContext(new
SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+ sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=foo")
+
assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+ assert(!sc.listJars().exists(_.contains("org.slf4j_slf4j-api-1.7.10.jar")))
+
assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+ }
+
+ test("SPARK-33084: Add jar support ivy url -- transitive=true will download
dependency jars") {
+ sc = new SparkContext(new
SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+ sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")
+
assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+ assert(sc.listJars().exists(_.contains("org.slf4j_slf4j-api-1.7.10.jar")))
+
assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+ }
+
+ test("SPARK-33084: Add jar support ivy url -- test exclude param when
transitive=true") {
+ sc = new SparkContext(new
SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+ sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0" +
+ "?exclude=commons-lang:commons-lang&transitive=true")
+
assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+ assert(sc.listJars().exists(_.contains("org.slf4j_slf4j-api-1.7.10.jar")))
+
assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+ }
+
+ test("SPARK-33084: Add jar support ivy url -- test different version") {
+ sc = new SparkContext(new
SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+ sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0")
+
assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+ sc.addJar("ivy://org.apache.hive:hive-storage-api:2.6.0")
+
assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.6.0.jar")))
+ }
+
+ test("SPARK-33084: Add jar support ivy url -- test invalid param") {
+ sc = new SparkContext(new
SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+ sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?invalidParam=foo")
Review comment:
> https://github.com/apache/spark/pull/29966/files#r533579040
> If we show a warning message for this case, we need tests for that.
Done
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]