Github user vanzin commented on a diff in the pull request:
https://github.com/apache/spark/pull/20853#discussion_r176152842
--- Diff: core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
---
@@ -137,16 +138,32 @@ private[deploy] object DependencyUtils {
def resolveGlobPaths(paths: String, hadoopConf: Configuration): String =
{
require(paths != null, "paths cannot be null.")
Utils.stringToSeq(paths).flatMap { path =>
- val uri = Utils.resolveURI(path)
- uri.getScheme match {
- case "local" | "http" | "https" | "ftp" => Array(path)
- case _ =>
- val fs = FileSystem.get(uri, hadoopConf)
- Option(fs.globStatus(new Path(uri))).map { status =>
- status.filter(_.isFile).map(_.getPath.toUri.toString)
- }.getOrElse(Array(path))
+ val (base, fragment) = splitOnFragment(path)
+ (resolveGlobPath(base, hadoopConf), fragment) match {
+ case (resolved, Some(_)) if resolved.length > 1 => throw new
SparkException(
+ s"${base.toString} resolves ambiguously to multiple files:
${resolved.mkString(",")}")
+ case (resolved, Some(namedAs)) => resolved.map( _ + "#" + namedAs)
+ case (resolved, _) => resolved
}
}.mkString(",")
}
+ private def splitOnFragment(path: String): (URI, Option[String]) = {
+ val uri = Utils.resolveURI(path)
+ val withoutFragment = new URI(uri.getScheme,
uri.getSchemeSpecificPart, null)
+ val fragment = if (uri.getFragment != null) Some(uri.getFragment) else
None
--- End diff --
`Option(uri.getFragment)`
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]