Github user gaborgsomogyi commented on a diff in the pull request:
https://github.com/apache/spark/pull/20853#discussion_r175585634
--- Diff: core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
---
@@ -137,16 +138,36 @@ private[deploy] object DependencyUtils {
def resolveGlobPaths(paths: String, hadoopConf: Configuration): String =
{
require(paths != null, "paths cannot be null.")
Utils.stringToSeq(paths).flatMap { path =>
- val uri = Utils.resolveURI(path)
- uri.getScheme match {
- case "local" | "http" | "https" | "ftp" => Array(path)
- case _ =>
- val fs = FileSystem.get(uri, hadoopConf)
- Option(fs.globStatus(new Path(uri))).map { status =>
- status.filter(_.isFile).map(_.getPath.toUri.toString)
- }.getOrElse(Array(path))
+ val (base, fragment) = splitOnFragment(Utils.resolveURI(path))
+ (resolveGlobPath(base, hadoopConf), fragment) match {
+ case (resolved: Array[String], Some(_)) if resolved.length > 1 =>
throw new SparkException(
+ s"${base.toString} resolves ambiguously to multiple files:
${resolved.mkString(",")}")
+ case (resolved: Array[String], Some(namedAs)) => resolved.map( _ +
"#" + namedAs)
--- End diff --
Same here.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]