Github user vanzin commented on a diff in the pull request:
https://github.com/apache/spark/pull/19130#discussion_r138980102
--- Diff: core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala ---
@@ -367,6 +368,54 @@ object SparkSubmit extends CommandLineUtils with
Logging {
}.orNull
}
+ // When running in YARN, for some remote resources with scheme:
+ // 1. Hadoop FileSystem doesn't support them.
+ // 2. We explicitly bypass Hadoop FileSystem with
"spark.yarn.dist.forceDownloadSchemes".
+ // We will download them to local disk prior to add to YARN's
distributed cache.
+ // For yarn client mode, since we already download them with above
code, so we only need to
+ // figure out the local path and replace the remote one.
+ if (clusterManager == YARN) {
+ sparkConf.setIfMissing(SecurityManager.SPARK_AUTH_SECRET_CONF,
"unused")
+ val secMgr = new SecurityManager(sparkConf)
+ val forceDownloadSchemes = sparkConf.get(FORCE_DOWNLOAD_SCHEMES)
+
+ def shouldDownload(scheme: String): Boolean = {
+ val isFsAvailable = () => {
+ Try { FileSystem.getFileSystemClass(scheme, hadoopConf)
}.isSuccess
+ }
+ forceDownloadSchemes.contains(scheme) || !isFsAvailable()
+ }
+
+ def downloadResource(resource: String): String = {
+ val uri = Utils.resolveURI(resource)
+ uri.getScheme match {
+ case "local" | "file" => resource
+ case e if shouldDownload(e) =>
+ val file = new File(targetDir, new Path(uri).getName)
+ if (file.exists()) {
+ file.toURI.toString
+ } else {
+ downloadFile(resource, targetDir, sparkConf, hadoopConf,
secMgr)
+ }
+ case _ => uri.toString
+ }
+ }
+
+ args.primaryResource = Option(args.primaryResource).map {
downloadResource }.orNull
+ args.files = Option(args.files).map { files =>
+ files.split(",").map(_.trim).filter(_.nonEmpty).map {
downloadResource }.mkString(",")
--- End diff --
Code like this (break a comma-separate string into a list) is copy & pasted
in so many places that it probably deserves a method in `Utils`.
There's one in `ConfigHelpers.stringToSeq` but that class is private to its
package.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]