Github user tgravescs commented on a diff in the pull request:
https://github.com/apache/spark/pull/2350#discussion_r17426754
--- Diff:
yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala ---
@@ -200,73 +150,85 @@ trait ClientBase extends Logging {
"for alternatives.")
}
+ /**
+ * Copy the given main resource to the distributed cache if the scheme
is not "local".
+ * Otherwise, set the corresponding key in our SparkConf to handle it
downstream.
+ * Each resource is represented by a 4-tuple of:
+ * (1) destination resource name,
+ * (2) local path to the resource,
+ * (3) Spark property key to set if the scheme is not local, and
+ * (4) whether to set permissions for this resource
+ */
List(
- (ClientBase.SPARK_JAR, ClientBase.sparkJar(sparkConf),
ClientBase.CONF_SPARK_JAR),
- (ClientBase.APP_JAR, args.userJar, ClientBase.CONF_SPARK_USER_JAR),
- ("log4j.properties", oldLog4jConf.getOrElse(null), null)
- ).foreach { case(destName, _localPath, confKey) =>
+ (SPARK_JAR, sparkJar(sparkConf), CONF_SPARK_JAR, false),
+ (APP_JAR, args.userJar, CONF_SPARK_USER_JAR, true),
+ ("log4j.properties", oldLog4jConf.orNull, null, false)
+ ).foreach { case (destName, _localPath, confKey, setPermissions) =>
val localPath: String = if (_localPath != null) _localPath.trim()
else ""
- if (! localPath.isEmpty()) {
+ if (!localPath.isEmpty()) {
val localURI = new URI(localPath)
- if (!ClientBase.LOCAL_SCHEME.equals(localURI.getScheme())) {
- val setPermissions = destName.equals(ClientBase.APP_JAR)
- val destPath = copyRemoteFile(dst, qualifyForLocal(localURI),
replication, setPermissions)
- val destFs = FileSystem.get(destPath.toUri(), conf)
- distCacheMgr.addResource(destFs, conf, destPath, localResources,
LocalResourceType.FILE,
- destName, statCache)
+ if (localURI.getScheme != LOCAL_SCHEME) {
+ val src = getQualifiedLocalPath(localURI)
+ val destPath = copyFileToRemote(dst, src, replication,
setPermissions)
+ val destFs = FileSystem.get(destPath.toUri(), hadoopConf)
+ distCacheMgr.addResource(destFs, hadoopConf, destPath,
+ localResources, LocalResourceType.FILE, destName, statCache)
} else if (confKey != null) {
+ // If the resource is intended for local use only, handle this
downstream
+ // by setting the appropriate property
sparkConf.set(confKey, localPath)
}
}
}
+ /**
+ * Do the same for any additional resources passed in through
ClientArguments.
+ * Each resource category is represented by a 3-tuple of:
+ * (1) comma separated list of resources in this category,
+ * (2) resource type, and
+ * (3) whether to add these resources to the classpath
+ */
val cachedSecondaryJarLinks = ListBuffer.empty[String]
- val fileLists = List( (args.addJars, LocalResourceType.FILE, true),
+ List(
+ (args.addJars, LocalResourceType.FILE, true),
(args.files, LocalResourceType.FILE, false),
- (args.archives, LocalResourceType.ARCHIVE, false) )
- fileLists.foreach { case (flist, resType, addToClasspath) =>
+ (args.archives, LocalResourceType.ARCHIVE, false)
+ ).foreach { case (flist, resType, addToClasspath) =>
if (flist != null && !flist.isEmpty()) {
- flist.split(',').foreach { case file: String =>
+ flist.split(',').foreach { file =>
val localURI = new URI(file.trim())
- if (!ClientBase.LOCAL_SCHEME.equals(localURI.getScheme())) {
+ if (localURI.getScheme != LOCAL_SCHEME) {
val localPath = new Path(localURI)
val linkname =
Option(localURI.getFragment()).getOrElse(localPath.getName())
- val destPath = copyRemoteFile(dst, localPath, replication)
- distCacheMgr.addResource(fs, conf, destPath, localResources,
resType,
- linkname, statCache)
+ val destPath = copyFileToRemote(dst, localPath, replication)
+ distCacheMgr.addResource(
+ fs, hadoopConf, destPath, localResources, resType, linkname,
statCache)
if (addToClasspath) {
cachedSecondaryJarLinks += linkname
}
} else if (addToClasspath) {
+ // Resource is intended for local use only and should be added
to the class path
cachedSecondaryJarLinks += file.trim()
}
}
}
}
- logInfo("Prepared Local resources " + localResources)
- sparkConf.set(ClientBase.CONF_SPARK_YARN_SECONDARY_JARS,
cachedSecondaryJarLinks.mkString(","))
+ if (cachedSecondaryJarLinks.nonEmpty) {
+ sparkConf.set(CONF_SPARK_YARN_SECONDARY_JARS,
cachedSecondaryJarLinks.mkString(","))
+ }
UserGroupInformation.getCurrentUser().addCredentials(credentials)
--- End diff --
It was here because this is where it filled in the credentials (via
obtainTokensForNamenodes). It can be moved up into ContainerLaunchContext
after it has called prepareLocalResources. We should probably change it to
pass the credentials around rather then using the global so that we can more
easily unit test.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]