Github user gaborgsomogyi commented on a diff in the pull request:
https://github.com/apache/spark/pull/20853#discussion_r175544492
--- Diff: core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
---
@@ -137,16 +137,29 @@ private[deploy] object DependencyUtils {
def resolveGlobPaths(paths: String, hadoopConf: Configuration): String =
{
require(paths != null, "paths cannot be null.")
Utils.stringToSeq(paths).flatMap { path =>
- val uri = Utils.resolveURI(path)
- uri.getScheme match {
- case "local" | "http" | "https" | "ftp" => Array(path)
- case _ =>
- val fs = FileSystem.get(uri, hadoopConf)
- Option(fs.globStatus(new Path(uri))).map { status =>
- status.filter(_.isFile).map(_.getPath.toUri.toString)
- }.getOrElse(Array(path))
+ val spath = path.split('#')
+ val renameAs = if (spath.length > 1) Some(spath(1)) else None
+ val resolved: Array[String] = resoloveGlobPath(spath(0), hadoopConf)
+ resolved match {
+ case array: Array[String] if !renameAs.isEmpty && array.length>1 =>
+ throw new SparkException(
+ s"${spath(1)} resolves ambiguously to multiple files:
${array.mkString(",")}")
+ case array: Array[String] if !renameAs.isEmpty => array.map( _ +
"#" + renameAs.get)
--- End diff --
Maybe we can find some meaningful name for `array` which makes me hard to
read the code.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]