Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19130#discussion_r139576095
  
    --- Diff: core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala ---
    @@ -367,6 +368,54 @@ object SparkSubmit extends CommandLineUtils with 
Logging {
           }.orNull
         }
     
    +    // When running in YARN, for some remote resources with scheme:
    +    //   1. Hadoop FileSystem doesn't support them.
    +    //   2. We explicitly bypass Hadoop FileSystem with 
"spark.yarn.dist.forceDownloadSchemes".
    +    // We will download them to local disk prior to add to YARN's 
distributed cache.
    +    // For yarn client mode, since we already download them with above 
code, so we only need to
    +    // figure out the local path and replace the remote one.
    +    if (clusterManager == YARN) {
    +      sparkConf.setIfMissing(SecurityManager.SPARK_AUTH_SECRET_CONF, 
"unused")
    +      val secMgr = new SecurityManager(sparkConf)
    +      val forceDownloadSchemes = sparkConf.get(FORCE_DOWNLOAD_SCHEMES)
    +
    +      def shouldDownload(scheme: String): Boolean = {
    +        val isFsAvailable = () => {
    +          Try { FileSystem.getFileSystemClass(scheme, hadoopConf) 
}.isSuccess
    +        }
    +        forceDownloadSchemes.contains(scheme) || !isFsAvailable()
    +      }
    +
    +      def downloadResource(resource: String): String = {
    +        val uri = Utils.resolveURI(resource)
    +        uri.getScheme match {
    +          case "local" | "file" => resource
    +          case e if shouldDownload(e) =>
    --- End diff --
    
    shall we explicitly list "http" | "https" | "ftp"?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to