[GitHub] spark pull request: SPARK-5087. [YARN] Merge yarn.Client and yarn....

andrewor14 Wed, 07 Jan 2015 07:43:02 -0800

Github user andrewor14 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/3896#discussion_r22592523
  
    --- Diff: yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala ---
    @@ -105,25 +127,498 @@ private[spark] class Client(
       }
     
       /** Set up security tokens for launching our ApplicationMaster 
container. */
    -  override def setupSecurityToken(amContainer: ContainerLaunchContext): 
Unit = {
    +  private def setupSecurityToken(amContainer: ContainerLaunchContext): 
Unit = {
         val dob = new DataOutputBuffer
         credentials.writeTokenStorageToStream(dob)
         amContainer.setTokens(ByteBuffer.wrap(dob.getData))
       }
     
       /** Get the application report from the ResourceManager for an 
application we have submitted. */
    -  override def getApplicationReport(appId: ApplicationId): 
ApplicationReport =
    +  def getApplicationReport(appId: ApplicationId): ApplicationReport =
         yarnClient.getApplicationReport(appId)
     
       /**
        * Return the security token used by this client to communicate with the 
ApplicationMaster.
        * If no security is enabled, the token returned by the report is null.
        */
    -  override def getClientToken(report: ApplicationReport): String =
    +  private def getClientToken(report: ApplicationReport): String =
         Option(report.getClientToAMToken).map(_.toString).getOrElse("")
    +
    +  /**
    +   * Fail fast if we have requested more resources per container than is 
available in the cluster.
    +   */
    +  private def verifyClusterResources(newAppResponse: 
GetNewApplicationResponse): Unit = {
    +    val maxMem = newAppResponse.getMaximumResourceCapability().getMemory()
    +    logInfo("Verifying our application has not requested more than the 
maximum " +
    +      s"memory capability of the cluster ($maxMem MB per container)")
    +    val executorMem = args.executorMemory + executorMemoryOverhead
    +    if (executorMem > maxMem) {
    +      throw new IllegalArgumentException(s"Required executor memory 
(${args.executorMemory}" +
    +        s"+$executorMemoryOverhead MB) is above the max threshold ($maxMem 
MB) of this cluster!")
    +    }
    +    val amMem = args.amMemory + amMemoryOverhead
    +    if (amMem > maxMem) {
    +      throw new IllegalArgumentException(s"Required AM memory 
(${args.amMemory}" +
    +        s"+$amMemoryOverhead MB) is above the max threshold ($maxMem MB) 
of this cluster!")
    +    }
    +    logInfo("Will allocate AM container, with %d MB memory including %d MB 
overhead".format(
    +      amMem,
    +      amMemoryOverhead))
    +
    +    // We could add checks to make sure the entire cluster has enough 
resources but that involves
    +    // getting all the node reports and computing ourselves.
    +  }
    +
    +  /**
    +   * Copy the given file to a remote file system (e.g. HDFS) if needed.
    +   * The file is only copied if the source and destination file systems 
are different. This is used
    +   * for preparing resources for launching the ApplicationMaster 
container. Exposed for testing.
    +   */
    +  def copyFileToRemote(
    +      destDir: Path,
    +      srcPath: Path,
    +      replication: Short,
    +      setPerms: Boolean = false): Path = {
    +    val destFs = destDir.getFileSystem(hadoopConf)
    +    val srcFs = srcPath.getFileSystem(hadoopConf)
    +    var destPath = srcPath
    +    if (!compareFs(srcFs, destFs)) {
    +      destPath = new Path(destDir, srcPath.getName())
    +      logInfo(s"Uploading resource $srcPath -> $destPath")
    +      FileUtil.copy(srcFs, srcPath, destFs, destPath, false, hadoopConf)
    +      destFs.setReplication(destPath, replication)
    +      if (setPerms) {
    +        destFs.setPermission(destPath, new 
FsPermission(APP_FILE_PERMISSION))
    +      }
    +    } else {
    +      logInfo(s"Source and destination file systems are the same. Not 
copying $srcPath")
    +    }
    +    // Resolve any symlinks in the URI path so using a "current" symlink 
to point to a specific
    +    // version shows the specific version in the distributed cache 
configuration
    +    val qualifiedDestPath = destFs.makeQualified(destPath)
    +    val fc = FileContext.getFileContext(qualifiedDestPath.toUri(), 
hadoopConf)
    +    fc.resolvePath(qualifiedDestPath)
    +  }
    +
    +  /**
    +   * Given a local URI, resolve it and return a qualified local path that 
corresponds to the URI.
    +   * This is used for preparing local resources to be included in the 
container launch context.
    +   */
    +  private def getQualifiedLocalPath(localURI: URI): Path = {
    +    val qualifiedURI =
    +      if (localURI.getScheme == null) {
    +        // If not specified, assume this is in the local filesystem to 
keep the behavior
    +        // consistent with that of Hadoop
    +        new URI(FileSystem.getLocal(hadoopConf).makeQualified(new 
Path(localURI)).toString)
    +      } else {
    +        localURI
    +      }
    +    new Path(qualifiedURI)
    +  }
    +
    +  /**
    +   * Upload any resources to the distributed cache if needed. If a 
resource is intended to be
    +   * consumed locally, set up the appropriate config for downstream code 
to handle it properly.
    +   * This is used for setting up a container launch context for our 
ApplicationMaster.
    +   * Exposed for testing.
    +   */
    +  def prepareLocalResources(appStagingDir: String): HashMap[String, 
LocalResource] = {
    +    logInfo("Preparing resources for our AM container")
    +    // Upload Spark and the application JAR to the remote file system if 
necessary,
    +    // and add them as local resources to the application master.
    +    val fs = FileSystem.get(hadoopConf)
    +    val dst = new Path(fs.getHomeDirectory(), appStagingDir)
    +    val nns = getNameNodesToAccess(sparkConf) + dst
    +    obtainTokensForNamenodes(nns, hadoopConf, credentials)
    +
    +    val replication = 
sparkConf.getInt("spark.yarn.submit.file.replication",
    +      fs.getDefaultReplication(dst)).toShort
    +    val localResources = HashMap[String, LocalResource]()
    +    FileSystem.mkdirs(fs, dst, new FsPermission(STAGING_DIR_PERMISSION))
    +
    +    val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]()
    +
    +    val oldLog4jConf = Option(System.getenv("SPARK_LOG4J_CONF"))
    +    if (oldLog4jConf.isDefined) {
    +      logWarning(
    +        "SPARK_LOG4J_CONF detected in the system environment. This 
variable has been " +
    +          "deprecated. Please refer to the \"Launching Spark on YARN\" 
documentation " +
    +          "for alternatives.")
    +    }
    +
    +    /**
    +     * Copy the given main resource to the distributed cache if the scheme 
is not "local".
    +     * Otherwise, set the corresponding key in our SparkConf to handle it 
downstream.
    +     * Each resource is represented by a 4-tuple of:
    +     *   (1) destination resource name,
    +     *   (2) local path to the resource,
    +     *   (3) Spark property key to set if the scheme is not local, and
    +     *   (4) whether to set permissions for this resource
    +     */
    +    List(
    +      (SPARK_JAR, sparkJar(sparkConf), CONF_SPARK_JAR, false),
    +      (APP_JAR, args.userJar, CONF_SPARK_USER_JAR, true),
    +      ("log4j.properties", oldLog4jConf.orNull, null, false)
    +    ).foreach { case (destName, _localPath, confKey, setPermissions) =>
    +      val localPath: String = if (_localPath != null) _localPath.trim() 
else ""
    +      if (!localPath.isEmpty()) {
    +        val localURI = new URI(localPath)
    +        if (localURI.getScheme != LOCAL_SCHEME) {
    +          val src = getQualifiedLocalPath(localURI)
    +          val destPath = copyFileToRemote(dst, src, replication, 
setPermissions)
    +          val destFs = FileSystem.get(destPath.toUri(), hadoopConf)
    +          distCacheMgr.addResource(destFs, hadoopConf, destPath,
    +            localResources, LocalResourceType.FILE, destName, statCache)
    +        } else if (confKey != null) {
    +          // If the resource is intended for local use only, handle this 
downstream
    +          // by setting the appropriate property
    +          sparkConf.set(confKey, localPath)
    +        }
    +      }
    +    }
    +
    +    /**
    +     * Do the same for any additional resources passed in through 
ClientArguments.
    +     * Each resource category is represented by a 3-tuple of:
    +     *   (1) comma separated list of resources in this category,
    +     *   (2) resource type, and
    +     *   (3) whether to add these resources to the classpath
    +     */
    +    val cachedSecondaryJarLinks = ListBuffer.empty[String]
    +    List(
    +      (args.addJars, LocalResourceType.FILE, true),
    +      (args.files, LocalResourceType.FILE, false),
    +      (args.archives, LocalResourceType.ARCHIVE, false)
    +    ).foreach { case (flist, resType, addToClasspath) =>
    +      if (flist != null && !flist.isEmpty()) {
    +        flist.split(',').foreach { file =>
    +          val localURI = new URI(file.trim())
    +          if (localURI.getScheme != LOCAL_SCHEME) {
    +            val localPath = new Path(localURI)
    +            val linkname = 
Option(localURI.getFragment()).getOrElse(localPath.getName())
    +            val destPath = copyFileToRemote(dst, localPath, replication)
    +            distCacheMgr.addResource(
    +              fs, hadoopConf, destPath, localResources, resType, linkname, 
statCache)
    +            if (addToClasspath) {
    +              cachedSecondaryJarLinks += linkname
    +            }
    +          } else if (addToClasspath) {
    +            // Resource is intended for local use only and should be added 
to the class path
    +            cachedSecondaryJarLinks += file.trim()
    +          }
    +        }
    +      }
    +    }
    +    if (cachedSecondaryJarLinks.nonEmpty) {
    +      sparkConf.set(CONF_SPARK_YARN_SECONDARY_JARS, 
cachedSecondaryJarLinks.mkString(","))
    +    }
    +
    +    localResources
    +  }
    +
    +  /**
    +   * Set up the environment for launching our ApplicationMaster container.
    +   */
    +  private def setupLaunchEnv(stagingDir: String): HashMap[String, String] 
= {
    +    logInfo("Setting up the launch environment for our AM container")
    +    val env = new HashMap[String, String]()
    +    val extraCp = sparkConf.getOption("spark.driver.extraClassPath")
    +    populateClasspath(args, yarnConf, sparkConf, env, extraCp)
    +    env("SPARK_YARN_MODE") = "true"
    +    env("SPARK_YARN_STAGING_DIR") = stagingDir
    +    env("SPARK_USER") = 
UserGroupInformation.getCurrentUser().getShortUserName()
    +
    +    // Set the environment variables to be passed on to the executors.
    +    distCacheMgr.setDistFilesEnv(env)
    +    distCacheMgr.setDistArchivesEnv(env)
    +
    +    // Pick up any environment variables for the AM provided through 
spark.yarn.appMasterEnv.*
    +    val amEnvPrefix = "spark.yarn.appMasterEnv."
    +    sparkConf.getAll
    +      .filter { case (k, v) => k.startsWith(amEnvPrefix) }
    +      .map { case (k, v) => (k.substring(amEnvPrefix.length), v) }
    +      .foreach { case (k, v) => 
YarnSparkHadoopUtil.addPathToEnvironment(env, k, v) }
    +
    +    // Keep this for backwards compatibility but users should move to the 
config
    +    sys.env.get("SPARK_YARN_USER_ENV").foreach { userEnvs =>
    +    // Allow users to specify some environment variables.
    +      YarnSparkHadoopUtil.setEnvFromInputString(env, userEnvs)
    +      // Pass SPARK_YARN_USER_ENV itself to the AM so it can use it to set 
up executor environments.
    +      env("SPARK_YARN_USER_ENV") = userEnvs
    +    }
    +
    +    // In cluster mode, if the deprecated SPARK_JAVA_OPTS is set, we need 
to propagate it to
    +    // executors. But we can't just set spark.executor.extraJavaOptions, 
because the driver's
    +    // SparkContext will not let that set spark* system properties, which 
is expected behavior for
    +    // Yarn clients. So propagate it through the environment.
    +    //
    +    // Note that to warn the user about the deprecation in cluster mode, 
some code from
    +    // SparkConf#validateSettings() is duplicated here (to avoid 
triggering the condition
    +    // described above).
    +    if (isLaunchingDriver) {
    +      sys.env.get("SPARK_JAVA_OPTS").foreach { value =>
    +        val warning =
    +          s"""
    +            |SPARK_JAVA_OPTS was detected (set to '$value').
    +            |This is deprecated in Spark 1.0+.
    +            |
    +            |Please instead use:
    +            | - ./spark-submit with conf/spark-defaults.conf to set 
defaults for an application
    +            | - ./spark-submit with --driver-java-options to set -X 
options for a driver
    +            | - spark.executor.extraJavaOptions to set -X options for 
executors
    +          """.stripMargin
    +        logWarning(warning)
    +        for (proc <- Seq("driver", "executor")) {
    +          val key = s"spark.$proc.extraJavaOptions"
    +          if (sparkConf.contains(key)) {
    +            throw new SparkException(s"Found both $key and 
SPARK_JAVA_OPTS. Use only the former.")
    +          }
    +        }
    +        env("SPARK_JAVA_OPTS") = value
    +      }
    +    }
    +
    +    env
    +  }
    +
    +  /**
    +   * Set up a ContainerLaunchContext to launch our ApplicationMaster 
container.
    +   * This sets up the launch environment, java options, and the command 
for launching the AM.
    +   */
    +  private def createContainerLaunchContext(newAppResponse: 
GetNewApplicationResponse)
    +    : ContainerLaunchContext = {
    --- End diff --
    
    bump 1 line?



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request: SPARK-5087. [YARN] Merge yarn.Client and yarn....

Reply via email to