Github user vanzin commented on a diff in the pull request:
https://github.com/apache/spark/pull/19130#discussion_r138689976
--- Diff: core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala ---
@@ -367,6 +368,53 @@ object SparkSubmit extends CommandLineUtils with
Logging {
}.orNull
}
+ // When running in YARN cluster manager,
+ if (clusterManager == YARN) {
+ sparkConf.setIfMissing(SecurityManager.SPARK_AUTH_SECRET_CONF,
"unused")
+ val secMgr = new SecurityManager(sparkConf)
+ val forceDownloadSchemes = sparkConf.get(FORCE_DOWNLOAD_SCHEMES)
+
+ // Check the scheme list provided by
"spark.yarn.dist.forceDownloadSchemes" to see if current
+ // resource's scheme is included in this list, or Hadoop FileSystem
doesn't support current
+ // scheme, if so Spark will download the resources to local disk and
upload to Hadoop FS.
+ def shouldDownload(scheme: String): Boolean = {
+ val isFsAvailable = Try { FileSystem.getFileSystemClass(scheme,
hadoopConf) }
+ .map(_ => true).getOrElse(false)
--- End diff --
`Try { ... }.isSuccess`? You could also avoid this call if the scheme is in
the blacklist.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]