maropu commented on a change in pull request #29966:
URL: https://github.com/apache/spark/pull/29966#discussion_r533942826



##########
File path: core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
##########
@@ -15,22 +15,158 @@
  * limitations under the License.
  */
 
-package org.apache.spark.deploy
+package org.apache.spark.util
 
 import java.io.File
-import java.net.URI
+import java.net.{URI, URISyntaxException}
 
 import org.apache.commons.lang3.StringUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
+import org.apache.spark.deploy.SparkSubmitUtils
 import org.apache.spark.internal.Logging
-import org.apache.spark.util.{MutableURLClassLoader, Utils}
 
-private[deploy] object DependencyUtils extends Logging {
+case class IvyProperties(
+    packagesExclusions: String,
+    packages: String,
+    repositories: String,
+    ivyRepoPath: String,
+    ivySettingsPath: String)
+
+private[spark] object DependencyUtils extends Logging {
+
+  def getIvyProperties(): IvyProperties = {
+    val Seq(packagesExclusions, packages, repositories, ivyRepoPath, 
ivySettingsPath) = Seq(
+      "spark.jars.excludes",
+      "spark.jars.packages",
+      "spark.jars.repositories",
+      "spark.jars.ivy",
+      "spark.jars.ivySettings"
+    ).map(sys.props.get(_).orNull)
+    IvyProperties(packagesExclusions, packages, repositories, ivyRepoPath, 
ivySettingsPath)
+  }
+
+  /**
+   * Parse URI query string's parameter value of `transitive` and `exclude`.
+   * Other invalid parameters will be ignored.
+   *
+   * @param uri Ivy uri need to be downloaded.
+   * @return Tuple value of parameter `transitive` and `exclude` value.
+   *
+   *         1. transitive: whether to download dependency jar of ivy URI, 
default value is false
+   *            and this parameter value is case-sensitive. Invalid value will 
be treat as false.
+   *            Example: Input:  
exclude=org.mortbay.jetty:jetty&transitive=true
+   *            Output:  true
+   *
+   *         2. exclude: comma separated exclusions to apply when resolving 
transitive dependencies,
+   *            consists of `group:module` pairs separated by commas.
+   *            Example: Input:  
excludeorg.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http
+   *            Output:  [org.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http]
+   */
+  private def parseQueryParams(uri: URI): (Boolean, String) = {
+    val uriQuery = uri.getQuery
+    if (uriQuery == null) {
+      (false, "")
+    } else {
+      val mapTokens = uriQuery.split("&").map(_.split("="))
+      if (mapTokens.exists(token =>
+        token.length != 2 || StringUtils.isBlank(token(0)) || 
StringUtils.isBlank(token(1)))) {
+        throw new URISyntaxException(uri.toString, s"Invalid query string: 
$uriQuery")
+      }
+      val groupedParams = mapTokens.map(kv => (kv(0), kv(1))).groupBy(_._1)
+      // Parse transitive parameters (e.g., transitive=true) in an ivy URL, 
default value is false
+      var transitive: Boolean = false
+      groupedParams.get("transitive").foreach { params =>
+        if (params.length > 1) {
+          logWarning("It's best to specify `transitive` parameter in ivy URL 
query only once." +
+            " If there are multiple `transitive` parameter, we will select the 
last one")
+        }
+        params.map(_._2).foreach {
+          case "true" => transitive = true
+          case _ => transitive = false
+        }
+      }
+      // Parse an excluded list (e.g., 
exclude=org.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http)
+      // in an ivy URL. When download ivy URL jar, Spark won't download 
transitive jar
+      // in a excluded list.
+      val exclusionList = groupedParams.get("exclude").map { params =>
+        params.map(_._2).flatMap { excludeString =>
+          val excludes = excludeString.split(",")
+          if (excludes.map(_.split(":")).exists(token =>
+            token.length != 2 || StringUtils.isBlank(token(0)) || 
StringUtils.isBlank(token(1)))) {
+            throw new URISyntaxException(uri.toString, "Invalid exclude 
string: " +
+              "expected 'org:module,org:module,..', found " + excludeString)
+          }
+          excludes
+        }.mkString(",")
+      }.getOrElse("")
+
+      val invalidParams = groupedParams
+        .filter(entry => !Seq("transitive", "exclude").contains(entry._1))
+        .keys.toArray.sorted
+      if (invalidParams.nonEmpty) {
+        logWarning(
+          s"Invalid parameters `${invalidParams.mkString(",")}` found in URI 
query `$uriQuery`.")
+      }
+
+      groupedParams.foreach { case (key: String, values: Array[(String, 
String)]) =>
+        if (key != "transitive" || key != "exclude") {
+          logWarning("Invalid parameter")
+        }
+      }
+
+      (transitive, exclusionList)
+    }
+  }
+
+  /**
+   * Download Ivy URIs dependency jars.
+   *
+   * @param uri Ivy uri need to be downloaded. The URI format should be:
+   *              `ivy://group:module:version[?query]`
+   *            Ivy URI query part format should be:
+   *              `parameter=value&parameter=value...`
+   *            Note that currently ivy URI query part support two parameters:
+   *             1. transitive: whether to download dependent jars related to 
your ivy URL.
+   *                transitive=false or `transitive=true`, if not set, the 
default value is false.
+   *             2. exclude: exclusion list when download ivy URL jar and 
dependency jars.
+   *                The `exclude` parameter content is a ',' separated 
`group:module` pair string :
+   *                `exclude=group:module,group:module...`
+   * @return Comma separated string list of jars downloaded.
+   */
+  def resolveMavenDependencies(uri: URI): Seq[String] = {
+    try {
+      val ivyProperties = DependencyUtils.getIvyProperties()
+      val authority = uri.getAuthority
+      if (authority == null) {
+        throw new URISyntaxException(
+          uri.toString, "Invalid url: Expected 'org:module:version', found 
null")
+      }
+      if (authority.split(":").length != 3) {
+        throw new URISyntaxException(
+          uri.toString, "Invalid url: Expected 'org:module:version', found " + 
authority)
+      }
+
+      val (transitive, exclusionList) = parseQueryParams(uri)
+
+      resolveMavenDependencies(
+        transitive,
+        exclusionList,
+        authority,
+        ivyProperties.repositories,
+        ivyProperties.ivyRepoPath,
+        Option(ivyProperties.ivySettingsPath)
+      ).split(",")
+    } catch {
+      case e: URISyntaxException =>

Review comment:
       Which library API throws this exception? Could you minimize the 
try-catch block?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to