Github user vanzin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19130#discussion_r138980102
  
    --- Diff: core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala ---
    @@ -367,6 +368,54 @@ object SparkSubmit extends CommandLineUtils with 
Logging {
           }.orNull
         }
     
    +    // When running in YARN, for some remote resources with scheme:
    +    //   1. Hadoop FileSystem doesn't support them.
    +    //   2. We explicitly bypass Hadoop FileSystem with 
"spark.yarn.dist.forceDownloadSchemes".
    +    // We will download them to local disk prior to add to YARN's 
distributed cache.
    +    // For yarn client mode, since we already download them with above 
code, so we only need to
    +    // figure out the local path and replace the remote one.
    +    if (clusterManager == YARN) {
    +      sparkConf.setIfMissing(SecurityManager.SPARK_AUTH_SECRET_CONF, 
"unused")
    +      val secMgr = new SecurityManager(sparkConf)
    +      val forceDownloadSchemes = sparkConf.get(FORCE_DOWNLOAD_SCHEMES)
    +
    +      def shouldDownload(scheme: String): Boolean = {
    +        val isFsAvailable = () => {
    +          Try { FileSystem.getFileSystemClass(scheme, hadoopConf) 
}.isSuccess
    +        }
    +        forceDownloadSchemes.contains(scheme) || !isFsAvailable()
    +      }
    +
    +      def downloadResource(resource: String): String = {
    +        val uri = Utils.resolveURI(resource)
    +        uri.getScheme match {
    +          case "local" | "file" => resource
    +          case e if shouldDownload(e) =>
    +            val file = new File(targetDir, new Path(uri).getName)
    +            if (file.exists()) {
    +              file.toURI.toString
    +            } else {
    +              downloadFile(resource, targetDir, sparkConf, hadoopConf, 
secMgr)
    +            }
    +          case _ => uri.toString
    +        }
    +      }
    +
    +      args.primaryResource = Option(args.primaryResource).map { 
downloadResource }.orNull
    +      args.files = Option(args.files).map { files =>
    +        files.split(",").map(_.trim).filter(_.nonEmpty).map { 
downloadResource }.mkString(",")
    --- End diff --
    
    Code like this (break a comma-separate string into a list) is copy & pasted 
in so many places that it probably deserves a method in `Utils`.
    
    There's one in `ConfigHelpers.stringToSeq` but that class is private to its 
package.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to