Repository: tez Updated Branches: refs/heads/branch-0.7 24957c9f8 -> 60b1f6e65
TEZ-3240. Improvements to tez.lib.uris to allow for multiple tarballs and mixing tarballs and jars. (Eric Badger via jlowe) Project: http://git-wip-us.apache.org/repos/asf/tez/repo Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/60b1f6e6 Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/60b1f6e6 Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/60b1f6e6 Branch: refs/heads/branch-0.7 Commit: 60b1f6e656fecd2b70a71e8e8c3d0bd3f6e7f8e3 Parents: 24957c9 Author: Jason Lowe <[email protected]> Authored: Fri May 20 20:41:28 2016 +0000 Committer: Jason Lowe <[email protected]> Committed: Fri May 20 20:41:28 2016 +0000 ---------------------------------------------------------------------- CHANGES.txt | 1 + docs/src/site/markdown/install.md | 164 ++++++++++++++++--- .../org/apache/tez/client/TezClientUtils.java | 124 +++++++++----- .../org/apache/tez/common/TezYARNUtils.java | 55 ++++--- .../apache/tez/dag/api/TezConfiguration.java | 20 ++- .../apache/tez/client/TestTezClientUtils.java | 85 ++++++++++ .../org/apache/tez/common/TestTezYARNUtils.java | 11 ++ 7 files changed, 367 insertions(+), 93 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tez/blob/60b1f6e6/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 28b4936..533383e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -6,6 +6,7 @@ Release 0.7.2 Unreleased INCOMPATIBLE CHANGES ALL CHANGES: + TEZ-3240. Improvements to tez.lib.uris to allow for multiple tarballs and mixing tarballs and jars. TEZ-3237. Corrupted shuffle transfers to disk are not detected during transfer TEZ-3258. Jvm Checker does not ignore DisableExplicitGC when checking JVM GC options. TEZ-3256. [Backport HADOOP-11032] Remove Guava Stopwatch dependency http://git-wip-us.apache.org/repos/asf/tez/blob/60b1f6e6/docs/src/site/markdown/install.md ---------------------------------------------------------------------- diff --git a/docs/src/site/markdown/install.md b/docs/src/site/markdown/install.md index 3854781..e9cdbf9 100644 --- a/docs/src/site/markdown/install.md +++ b/docs/src/site/markdown/install.md @@ -24,9 +24,11 @@ Replace x.y.z with the tez release number that you are using. E.g. 0.5.0 1. Deploy Apache Hadoop using either the 2.2.0 release or a compatible 2.x version. - You need to change the value of the hadoop.version property in the top-level pom.xml to match the version of the hadoop branch being used. - ``` - $ hadoop version - ``` + + ``` + $ hadoop version + ``` + 2. Build tez using `mvn clean package -DskipTests=true -Dmaven.javadoc.skip=true` - This assumes that you have already installed JDK6 or later and Maven 3 or later. - Tez also requires Protocol Buffers 2.5.0, including the protoc-compiler. @@ -50,16 +52,16 @@ Replace x.y.z with the tez release number that you are using. E.g. 0.5.0 at tez-dist/target/tez-x.y.z-SNAPSHOT.tar.gz - Assuming that the tez jars are put in /apps/ on HDFS, the command would be - ``` - hadoop dfs -mkdir /apps/tez-x.y.z-SNAPSHOT - hadoop dfs -copyFromLocal tez-dist/target/tez-x.y.z-SNAPSHOT-archive.tar.gz /apps/tez-x.y.z-SNAPSHOT/ - ``` + + ``` + hadoop dfs -mkdir /apps/tez-x.y.z-SNAPSHOT + hadoop dfs -copyFromLocal tez-dist/target/tez-x.y.z-SNAPSHOT-archive.tar.gz /apps/tez-x.y.z-SNAPSHOT/ + ``` + - tez-site.xml configuration. - Set tez.lib.uris to point to the tar.gz uploaded to HDFS. Assuming the steps mentioned so far were followed, - ``` - set tez.lib.uris to "${fs.defaultFS}/apps/tez-x.y.z-SNAPSHOT/tez-x.y.z-SNAPSHOT.tar.gz" - ``` + set tez.lib.uris to `${fs.defaultFS}/apps/tez-x.y.z-SNAPSHOT/tez-x.y.z-SNAPSHOT.tar.gz` - Ensure tez.use.cluster.hadoop-libs is not set in tez-site.xml, or if it is set, the value should be false - Please note that the tarball version should match the version of @@ -74,16 +76,20 @@ Replace x.y.z with the tez release number that you are using. E.g. 0.5.0 - Extract the tez minimal tarball created in step 2 to a local directory (assuming TEZ_JARS is where the files will be decompressed for the next steps) - ``` - tar -xvzf tez-dist/target/tez-x.y.z-minimal.tar.gz -C $TEZ_JARS - ``` + + ``` + tar -xvzf tez-dist/target/tez-x.y.z-minimal.tar.gz -C $TEZ_JARS + ``` + - set TEZ_CONF_DIR to the location of tez-site.xml - Add $TEZ_CONF_DIR, ${TEZ_JARS}/* and ${TEZ_JARS}/lib/* to the application classpath. For example, doing it via the standard Hadoop tool chain would use the following command to set up the application classpath: - ``` - export HADOOP_CLASSPATH=${TEZ_CONF_DIR}:${TEZ_JARS}/*:${TEZ_JARS}/lib/* - ``` + + ``` + export HADOOP_CLASSPATH=${TEZ_CONF_DIR}:${TEZ_JARS}/*:${TEZ_JARS}/lib/* + ``` + - Please note the "*" which is an important requirement when setting up classpaths for directories containing jar files. 6. There is a basic example of using an MRR job in the tez-examples.jar. @@ -125,22 +131,126 @@ Replace x.y.z with the tez release number that you are using. E.g. 0.5.0 can be verified by looking at the AMâs logs from the YARN ResourceManager UI. This needs mapred-site.xml to have "mapreduce.framework.name" set to "yarn-tez" +Various ways to configure tez.lib.uris +--------------------------------------- + +The `tez.lib.uris` configuration property supports a comma-separated list of values. The +types of values supported are: + - Path to simple file + - Path to a directory + - Path to a compressed archive ( tarball, zip, etc). + +For simple files and directories, Tez will add all these files and first-level entries in the +directories (recursive traversal of dirs is not supported) into the working directory of the +Tez runtime and they will automatically be included into the classpath. For archives i.e. +files whose names end with generally known compressed archive suffixes such as 'tgz', +'tar.gz', 'zip', etc. will be uncompressed into the container working directory too. However, +given that the archive structure is not known to the Tez framework, the user is expected to +configure `tez.lib.uris.classpath` to ensure that the nested directory structure of an +archive is added to the classpath. This classpath values should be relative i.e. the entries +should start with "./". + Hadoop Installation dependent Install/Deploy Instructions --------------------------------------------------------- + The above install instructions use Tez with pre-packaged Hadoop libraries included in the package and is the -recommended method for installation. If its needed to make Tez use the existing cluster Hadoop libraries then -follow this alternate machanism to setup Tez to use Hadoop libraries from the cluster. -Step 3 above changes as follows. Also subsequent steps would use tez-dist/target/tez-x.y.z-minimal.tar.gz instead of tez-dist/target/tez-x.y.z.tar.gz -- A tez build without Hadoop dependencies will be available at tez-dist/target/tez-x.y.z-minimal.tar.gz -- Assuming that the tez jars are put in /apps/ on HDFS, the command would be -"hadoop fs -mkdir /apps/tez-x.y.z" -"hadoop fs -copyFromLocal tez-dist/target/tez-x.y.z-minimal.tar.gz /apps/tez-x.y.z" -- tez-site.xml configuration -- Set tez.lib.uris to point to the paths in HDFS containing the tez jars. Assuming the steps mentioned so far were followed, -set tez.lib.uris to "${fs.defaultFS}/apps/tez-x.y.z/tez-x.y.z-minimal.tar.gz -- set tez.use.cluster.hadoop-libs to true +recommended method for installation. A full tarball with all dependencies is a better approach to ensure +that existing jobs continue to run during a cluster's rolling upgrade. + +Although the `tez.lib.uris` configuration options enable a wide variety of usage patterns, there +are 2 main alternative modes that are supported by the framework: + +1. Mode A: Using a tez tarball on HDFS along with Hadoop libraries available on the cluster. +2. Mode B: Using a tez tarball along with the Hadoop tarball. + +Both these modes will require a tez build without Hadoop dependencies and that is available at +tez-dist/target/tez-x.y.z-minimal.tar.gz. + +For Mode A: Tez tarball with using existing cluster Hadoop libraries by leveraging yarn.application.classpath +------------------------------------------------------------------------------------------------------------- + +This mode is not recommended for clusters that use rolling upgrades. Additionally, it is the user's responsibility +to ensure that the tez version being used is compatible with the version of Hadoop running on the cluster. +Step 3 above changes as follows. Also subsequent steps should use tez-dist/target/tez-x.y.z-minimal.tar.gz +instead of tez-dist/target/tez-x.y.z.tar.gz + + - A tez build without Hadoop dependencies will be available at tez-dist/target/tez-x.y.z-minimal.tar.gz + Assuming that the tez jars are put in /apps/ on HDFS, the command would be + + ``` + "hadoop fs -mkdir /apps/tez-x.y.z" + "hadoop fs -copyFromLocal tez-dist/target/tez-x.y.z-minimal.tar.gz /apps/tez-x.y.z" + ``` + + - tez-site.xml configuration + - Set tez.lib.uris to point to the paths in HDFS containing the tez jars. Assuming the steps mentioned so far were followed, +set tez.lib.uris to `${fs.defaultFS}/apps/tez-x.y.z/tez-x.y.z-minimal.tar.gz` + - Set tez.use.cluster.hadoop-libs to true + +For Mode B: Tez tarball with Hadoop tarball +-------------------------------------------- + +This mode will support rolling upgrades. It is the user's responsibility to ensure that the +versions of Tez and Hadoop being used are compatible. +To do this configuration, we need to change Step 3 of the +default instructions in the following ways. + + - Assuming that the tez archives/jars are put in /apps/ on HDFS, the command to put this +minimal Tez archive into HDFS would be: + + ``` + "hadoop fs -mkdir /apps/tez-x.y.z" + "hadoop fs -copyFromLocal tez-dist/target/tez-x.y.z-minimal.tar.gz /apps/tez-x.y.z" + ``` + + - Alternatively, you can put the minimal directory directly into HDFS and + reference the jars, instead of using an archive. The command to put + the minimal directory into HDFS would be: + + ``` + "hadoop fs -copyFromLocal tez-dist/target/tez-x.y.z-minimal/* /apps/tez-x.y.z" + ``` + + - After building hadoop, the hadoop tarball will be available at + hadoop/hadoop-dist/target/hadoop-x.y.z-SNAPSHOT.tar.gz + - Assuming that the hadoop jars are put in /apps/ on HDFS, the command to put this + Hadoop archive into HDFS would be: + + ``` + "hadoop fs -mkdir /apps/hadoop-x.y.z" + "hadoop fs -copyFromLocal hadoop-dist/target/hadoop-x.y.z-SNAPSHOT.tar.gz /apps/hadoop-x.y.z" + ``` + + - tez-site.xml configuration + - Set tez.lib.uris to point to the the archives and jars that are needed for Tez/Hadoop. + + - Example: When using both Tez and Hadoop archives, set tez.lib.uris to + `${fs.defaultFS}/apps/tez-x.y.z/tez-x.y.z-minimal.tar.gz#tez,${fs.defaultFS}/apps/hadoop-x.y.z/hadoop-x.y.z-SNAPSHOT.tar.gz#hadoop-mapreduce` + + - Example: When using Tez jars with a Hadoop archive, set tez.lib.uris to: + `${fs.defaultFS}/apps/tez-x.y.z,${fs.defaultFS}/apps/tez-x.y.z/lib,${fs.defaultFS}/apps/hadoop-x.y.z/hadoop-x.y.z-SNAPSHOT.tar.gz#hadoop-mapreduce` + + - In tez.lib.uris, the text immediately following the '#' symbol is the fragment that + refers to the symlink that will be created for the archive. If no fragment is given, + the symlink will be set to the name of the archive. Fragments should not be given + to directories or jars. + + - If any archives are specified in tez.lib.uris, then tez.lib.uris.classpath must be set + to define the classpath for these archives as the archive structure is not known. + - Example: Classpath when using both Tez and Hadoop archives, set tez.lib.uris.classpath to: + + ``` +./tez/*:./tez/lib/*:./hadoop-mapreduce/hadoop-x.y.z-SNAPSHOT/share/hadoop/common/*:./hadoop-mapreduce/hadoop-x.y.z-SNAPSHOT/share/hadoop/common/lib/*:./hadoop-mapreduce/hadoop-x.y.z-SNAPSHOT/share/hadoop/hdfs/*:./hadoop-mapreduce/hadoop-x.y.z-SNAPSHOT/share/hadoop/hdfs/lib/*:./hadoop-mapreduce/hadoop-x.y.z-SNAPSHOT/share/hadoop/yarn/*:./hadoop-mapreduce/hadoop-x.y.z-SNAPSHOT/share/hadoop/yarn/lib/*:./hadoop-mapreduce/hadoop-x.y.z-SNAPSHOT/share/hadoop/mapreduce/*:./hadoop-mapreduce/hadoop-x.y.z-SNAPSHOT/share/hadoop/mapreduce/lib/* + ``` + + - Example: Classpath when using Tez jars with a Hadoop archive, set tez.lib.uris.classpath to: + + ``` +./hadoop-mapreduce/hadoop-x.y.z-SNAPSHOT/share/hadoop/common/*:./hadoop-mapreduce/hadoop-x.y.z-SNAPSHOT/share/hadoop/common/lib/*:./hadoop-mapreduce/hadoop-x.y.z-SNAPSHOT/share/hadoop/hdfs/*:./hadoop-mapreduce/hadoop-x.y.z-SNAPSHOT/share/hadoop/hdfs/lib/*:./hadoop-mapreduce/hadoop-x.y.z-SNAPSHOT/share/hadoop/yarn/*:./hadoop-mapreduce/hadoop-x.y.z-SNAPSHOT/share/hadoop/yarn/lib/*:./hadoop-mapreduce/hadoop-x.y.z-SNAPSHOT/share/hadoop/mapreduce/*:./hadoop-mapreduce/hadoop-x.y.z-SNAPSHOT/share/hadoop/mapreduce/lib/* + ``` [Install instructions for older versions of Tez (pre 0.5.0)](./install_pre_0_5_0.html) ----------------------------------------------------------------------------------- + http://git-wip-us.apache.org/repos/asf/tez/blob/60b1f6e6/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java ---------------------------------------------------------------------- diff --git a/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java b/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java index cf49c31..8a5a661 100644 --- a/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java +++ b/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java @@ -70,6 +70,7 @@ import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException; @@ -129,7 +130,8 @@ public class TezClientUtils { Path p = new Path(uri); FileSystem fs = p.getFileSystem(conf); - p = fs.resolvePath(p); + p = fs.resolvePath(p.makeQualified(fs.getUri(), + fs.getWorkingDirectory())); FileSystem targetFS = p.getFileSystem(conf); if (targetFS.isDirectory(p)) { return targetFS.listStatus(p); @@ -172,37 +174,12 @@ public class TezClientUtils { LOG.info("Using tez.lib.uris value from configuration: " + conf.get(TezConfiguration.TEZ_LIB_URIS)); + LOG.info("Using tez.lib.uris.classpath value from configuration: " + + conf.get(TezConfiguration.TEZ_LIB_URIS_CLASSPATH)); - if (tezJarUris.length == 1 && ( - tezJarUris[0].endsWith(".tar.gz") || - tezJarUris[0].endsWith(".tgz") || - tezJarUris[0].endsWith(".zip") || - tezJarUris[0].endsWith(".tar"))) { - String fileName = tezJarUris[0]; + usingTezArchive = addLocalResources(conf, tezJarUris, + tezJarResources, credentials); - FileStatus fStatus = getLRFileStatus(fileName, conf)[0]; - LocalResourceVisibility lrVisibility; - if (checkAncestorPermissionsForAllUsers(conf, fileName, FsAction.EXECUTE) && - fStatus.getPermission().getOtherAction().implies(FsAction.READ)) { - lrVisibility = LocalResourceVisibility.PUBLIC; - } else { - lrVisibility = LocalResourceVisibility.PRIVATE; - } - tezJarResources.put(TezConstants.TEZ_TAR_LR_NAME, - LocalResource.newInstance( - ConverterUtils.getYarnUrlFromPath(fStatus.getPath()), - LocalResourceType.ARCHIVE, - lrVisibility, - fStatus.getLen(), - fStatus.getModificationTime())); - Path[] tezJarPaths = { fStatus.getPath() }; - // obtain credentials - TokenCache.obtainTokensForFileSystems(credentials, tezJarPaths, conf); - usingTezArchive = true; - } else { // Treat as non-archives - addLocalResources(conf, tezJarUris, tezJarResources, credentials); - } - if (tezJarResources.isEmpty()) { throw new TezUncheckedException( "No files found in locations specified in " @@ -218,41 +195,101 @@ public class TezClientUtils { return usingTezArchive; } - private static void addLocalResources(Configuration conf, String[] configUris, - Map<String, LocalResource> tezJarResources, Credentials credentials) throws IOException { + private static boolean addLocalResources(Configuration conf, + String[] configUris, Map<String, LocalResource> tezJarResources, + Credentials credentials) throws IOException { + boolean usingTezArchive = false; if (configUris == null || configUris.length == 0) { - return; + return usingTezArchive; } List<Path> configuredPaths = Lists.newArrayListWithCapacity(configUris.length); for (String configUri : configUris) { - boolean ancestorsHavePermission = checkAncestorPermissionsForAllUsers(conf, configUri, - FsAction.EXECUTE); + URI u = null; + try { + u = new URI(configUri); + } catch (URISyntaxException e) { + throw new IOException("Unable to convert " + configUri + "to URI", e); + } + Path p = new Path(u); + FileSystem remoteFS = p.getFileSystem(conf); + p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), + remoteFS.getWorkingDirectory())); + + LocalResourceType type = null; + + //Check if path is an archive + if(p.getName().endsWith(".tar.gz") || + p.getName().endsWith(".tgz") || + p.getName().endsWith(".zip") || + p.getName().endsWith(".tar")) { + type = LocalResourceType.ARCHIVE; + } else { + type = LocalResourceType.FILE; + } + FileStatus [] fileStatuses = getLRFileStatus(configUri, conf); + for (FileStatus fStatus : fileStatuses) { + String linkName; if (fStatus.isDirectory()) { // Skip directories - no recursive search support. continue; } + // If the resource is an archive, we've already done this work + if(type != LocalResourceType.ARCHIVE) { + u = fStatus.getPath().toUri(); + p = new Path(u); + remoteFS = p.getFileSystem(conf); + p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), + remoteFS.getWorkingDirectory())); + if(null != u.getFragment()) { + LOG.warn("Fragment set for link being interpreted as a file," + + "URI: " + u.toString()); + } + } + + // Add URI fragment or just the filename + Path name = new Path((null == u.getFragment()) + ? p.getName() + : u.getFragment()); + if (name.isAbsolute()) { + throw new IllegalArgumentException("Resource name must be " + + "relative, not absolute: " + name + + " in URI: " + u.toString()); + } + + URL url = ConverterUtils.getYarnUrlFromURI(p.toUri()); + linkName = name.toUri().getPath(); + // For legacy reasons, set archive to tezlib if there is + // only a single archive and no fragment + if(type == LocalResourceType.ARCHIVE && + configUris.length == 1 && null == u.getFragment()) { + linkName = TezConstants.TEZ_TAR_LR_NAME; + usingTezArchive = true; + } + LocalResourceVisibility lrVisibility; - if (ancestorsHavePermission && + if (checkAncestorPermissionsForAllUsers(conf, url.getFile(), + FsAction.EXECUTE) && fStatus.getPermission().getOtherAction().implies(FsAction.READ)) { lrVisibility = LocalResourceVisibility.PUBLIC; } else { lrVisibility = LocalResourceVisibility.PRIVATE; } - String rsrcName = fStatus.getPath().getName(); - if (tezJarResources.containsKey(rsrcName)) { + + if (tezJarResources.containsKey(linkName)) { String message = "Duplicate resource found" - + ", resourceName=" + rsrcName + + ", resourceName=" + linkName + ", existingPath=" + - tezJarResources.get(rsrcName).getResource().toString() + tezJarResources.get(linkName).getResource().toString() + ", newPath=" + fStatus.getPath(); LOG.warn(message); } - tezJarResources.put(rsrcName, + + tezJarResources.put(linkName, LocalResource.newInstance( - ConverterUtils.getYarnUrlFromPath(fStatus.getPath()), - LocalResourceType.FILE, + url, + type, lrVisibility, fStatus.getLen(), fStatus.getModificationTime())); @@ -264,6 +301,7 @@ public class TezClientUtils { TokenCache.obtainTokensForFileSystems(credentials, configuredPaths.toArray(new Path[configuredPaths.size()]), conf); } + return usingTezArchive; } static void processTezLocalCredentialsFile(Credentials credentials, Configuration conf) http://git-wip-us.apache.org/repos/asf/tez/blob/60b1f6e6/tez-api/src/main/java/org/apache/tez/common/TezYARNUtils.java ---------------------------------------------------------------------- diff --git a/tez-api/src/main/java/org/apache/tez/common/TezYARNUtils.java b/tez-api/src/main/java/org/apache/tez/common/TezYARNUtils.java index d7093db..c505ca8 100644 --- a/tez-api/src/main/java/org/apache/tez/common/TezYARNUtils.java +++ b/tez-api/src/main/java/org/apache/tez/common/TezYARNUtils.java @@ -23,6 +23,8 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.Shell; @@ -35,6 +37,7 @@ import org.apache.tez.dag.api.TezConstants; @Private public class TezYARNUtils { + private static Logger LOG = LoggerFactory.getLogger(TezYARNUtils.class); private static Pattern ENV_VARIABLE_PATTERN = Pattern.compile(Shell.getEnvironmentVariableRegex()); @@ -54,27 +57,41 @@ public class TezYARNUtils { .append(Environment.PWD.$() + File.separator + "*") .append(File.pathSeparator); - // Next add the tez libs, if specified via an archive. - if (usingArchive) { - // Add PWD/tezlib/* - classpathBuilder.append(Environment.PWD.$()) - .append(File.separator) - .append(TezConstants.TEZ_TAR_LR_NAME) - .append(File.separator) - .append("*") - .append(File.pathSeparator); + String [] tezLibUrisClassPath = conf.getStrings(TezConfiguration.TEZ_LIB_URIS_CLASSPATH); - // Add PWD/tezlib/lib/* - classpathBuilder.append(Environment.PWD.$()) - .append(File.separator) - .append(TezConstants.TEZ_TAR_LR_NAME) - .append(File.separator) - .append("lib") - .append(File.separator) - .append("*") - .append(File.pathSeparator); - } + if(!conf.getBoolean(TezConfiguration.TEZ_IGNORE_LIB_URIS, false) && + tezLibUrisClassPath != null && tezLibUrisClassPath.length != 0) { + for(String c : tezLibUrisClassPath) { + classpathBuilder.append(c.trim()) + .append(File.pathSeparator); + } + } else { + if(conf.getBoolean(TezConfiguration.TEZ_IGNORE_LIB_URIS, false)) { + LOG.info("Ignoring '" + TezConfiguration.TEZ_LIB_URIS + "' since '" + + TezConfiguration.TEZ_IGNORE_LIB_URIS + "' is set to true "); + } + + // Legacy: Next add the tez libs, if specified via an archive. + if (usingArchive) { + // Add PWD/tezlib/* + classpathBuilder.append(Environment.PWD.$()) + .append(File.separator) + .append(TezConstants.TEZ_TAR_LR_NAME) + .append(File.separator) + .append("*") + .append(File.pathSeparator); + // Legacy: Add PWD/tezlib/lib/* + classpathBuilder.append(Environment.PWD.$()) + .append(File.separator) + .append(TezConstants.TEZ_TAR_LR_NAME) + .append(File.separator) + .append("lib") + .append(File.separator) + .append("*") + .append(File.pathSeparator); + } + } // Last add HADOOP_CLASSPATH, if it's required. if (conf.getBoolean(TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS, TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS_DEFAULT)) { http://git-wip-us.apache.org/repos/asf/tez/blob/60b1f6e6/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java ---------------------------------------------------------------------- diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java index 96adff3..6bb531b 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java @@ -944,19 +944,19 @@ public class TezConfiguration extends Configuration { * The location of the Tez libraries which will be localized for DAGs. * This follows the following semantics * <ol> - * <li> To use a single .tar.gz or .tgz file (generated by the tez build), the full path to this + * <li> To use .tar.gz or .tgz files (generated by the tez or hadoop builds), the full path to this * file (including filename) should be specified. The internal structure of the uncompressed tgz - * will be retained under $CWD/tezlib.</li> + * will be defined by 'tez.lib.uris.classpath'</li> * * <li> If a single file is specified without the above mentioned extensions - it will be treated as * a regular file. This means it will not be uncompressed during runtime. </li> * * <li> If multiple entries exist * <ul> - * <li> Files: will be treated as regular files (not uncompressed during runtime) </li> + * <li> Regular Files: will be treated as regular files (not uncompressed during runtime) </li> + * <li> Archive Files: will be treated as archives and will be uncompressed during runtime </li> * <li> Directories: all files under the directory (non-recursive) will be made available (but not * uncompressed during runtime). </li> - * <li> All files / contents of directories are flattened into a single directory - $CWD </li> * </ul> * </ol> */ @@ -964,6 +964,18 @@ public class TezConfiguration extends Configuration { public static final String TEZ_LIB_URIS = TEZ_PREFIX + "lib.uris"; /** + * + * Specify additional user classpath information to be used for Tez AM and all containers. + * This will be appended to the classpath after PWD + * + * 'tez.lib.uris.classpath' defines the relative classpath into the archives + * that are set in 'tez.lib.uris' + * + */ + @ConfigurationScope(Scope.AM) + public static final String TEZ_LIB_URIS_CLASSPATH = TEZ_PREFIX + "lib.uris.classpath"; + + /** * Auxiliary resources to be localized for the Tez AM and all its containers. * * Value is comma-separated list of fully-resolved directories or file paths. All resources http://git-wip-us.apache.org/repos/asf/tez/blob/60b1f6e6/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java ---------------------------------------------------------------------- diff --git a/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java b/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java index 0800085..70a693a 100644 --- a/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java +++ b/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java @@ -206,6 +206,91 @@ public class TestTezClientUtils { assertFalse(localizedMap.isEmpty()); } + /** + * + */ + @Test (timeout=5000) + public void validateSetTezJarLocalResourcesMultipleTarballs() throws Exception { + FileSystem localFs = FileSystem.getLocal(new Configuration()); + StringBuilder tezLibUris = new StringBuilder(); + + // Create 2 files + Path topDir = new Path(TEST_ROOT_DIR, "validatemultipletarballs"); + if (localFs.exists(topDir)) { + localFs.delete(topDir, true); + } + localFs.mkdirs(topDir); + + Path tarFile1 = new Path(topDir, "f1.tar.gz"); + Path tarFile2 = new Path(topDir, "f2.tar.gz"); + + Assert.assertTrue(localFs.createNewFile(tarFile1)); + Assert.assertTrue(localFs.createNewFile(tarFile2)); + tezLibUris.append(localFs.makeQualified(tarFile1).toString()).append("#tar1").append(","); + tezLibUris.append(localFs.makeQualified(tarFile2).toString()).append("#tar2").append(","); + + TezConfiguration conf = new TezConfiguration(); + conf.set(TezConfiguration.TEZ_LIB_URIS, tezLibUris.toString()); + Credentials credentials = new Credentials(); + Map<String, LocalResource> localizedMap = new HashMap<String, LocalResource>(); + TezClientUtils.setupTezJarsLocalResources(conf, credentials, localizedMap); + Set<String> resourceNames = localizedMap.keySet(); + Assert.assertEquals(2, resourceNames.size()); + Assert.assertTrue(resourceNames.contains("tar1")); + Assert.assertTrue(resourceNames.contains("tar2")); + Assert.assertFalse(resourceNames.contains("f1.tar.gz")); + Assert.assertFalse(resourceNames.contains("f2.tar.gz")); + + + Assert.assertTrue(localFs.delete(tarFile1, true)); + Assert.assertTrue(localFs.delete(tarFile2, true)); + Assert.assertTrue(localFs.delete(topDir, true)); + } + + /** + * + */ + @Test (timeout=5000) + public void validateSetTezJarLocalResourcesMixTarballAndJar() throws Exception { + FileSystem localFs = FileSystem.getLocal(new Configuration()); + StringBuilder tezLibUris = new StringBuilder(); + + // Create 2 jars and 1 archive + Path topDir = new Path(TEST_ROOT_DIR, "validatetarballandjar"); + if (localFs.exists(topDir)) { + localFs.delete(topDir, true); + } + localFs.mkdirs(topDir); + + Path tarFile1 = new Path(topDir, "f1.tar.gz"); + Path jarFile2 = new Path(topDir, "f2.jar"); + Path jarFile3 = new Path(topDir, "f3.jar"); + + Assert.assertTrue(localFs.createNewFile(tarFile1)); + Assert.assertTrue(localFs.createNewFile(jarFile2)); + Assert.assertTrue(localFs.createNewFile(jarFile3)); + + tezLibUris.append(localFs.makeQualified(topDir).toString()).append(","); + tezLibUris.append(localFs.makeQualified(tarFile1).toString()).append("#tar1").append(","); + + TezConfiguration conf = new TezConfiguration(); + conf.set(TezConfiguration.TEZ_LIB_URIS, tezLibUris.toString()); + Credentials credentials = new Credentials(); + Map<String, LocalResource> localizedMap = new HashMap<String, LocalResource>(); + TezClientUtils.setupTezJarsLocalResources(conf, credentials, localizedMap); + Set<String> resourceNames = localizedMap.keySet(); + Assert.assertEquals(4, resourceNames.size()); + Assert.assertTrue(resourceNames.contains("tar1")); + Assert.assertTrue(resourceNames.contains("f1.tar.gz")); + Assert.assertTrue(resourceNames.contains("f2.jar")); + Assert.assertTrue(resourceNames.contains("f3.jar")); + + Assert.assertTrue(localFs.delete(tarFile1, true)); + Assert.assertTrue(localFs.delete(jarFile2, true)); + Assert.assertTrue(localFs.delete(jarFile3, true)); + Assert.assertTrue(localFs.delete(topDir, true)); + } + @Test(timeout = 5000) public void testSessionTokenInAmClc() throws IOException, YarnException { http://git-wip-us.apache.org/repos/asf/tez/blob/60b1f6e6/tez-api/src/test/java/org/apache/tez/common/TestTezYARNUtils.java ---------------------------------------------------------------------- diff --git a/tez-api/src/test/java/org/apache/tez/common/TestTezYARNUtils.java b/tez-api/src/test/java/org/apache/tez/common/TestTezYARNUtils.java index 6e9e06c..2dabf51 100644 --- a/tez-api/src/test/java/org/apache/tez/common/TestTezYARNUtils.java +++ b/tez-api/src/test/java/org/apache/tez/common/TestTezYARNUtils.java @@ -80,4 +80,15 @@ public class TestTezYARNUtils { Assert.assertEquals("User env should append default env", Environment.PWD.$() + File.pathSeparator + "USER_PATH" + File.pathSeparator + "DEFAULT_PATH", value3); } + + @Test(timeout = 5000) + public void testTezLibUrisClasspath() { + Configuration conf = new Configuration(false); + conf.set(TezConfiguration.TEZ_LIB_URIS_CLASSPATH, "foobar"); + String classpath = TezYARNUtils.getFrameworkClasspath(conf, true); + Assert.assertTrue(classpath.contains("foobar")); + Assert.assertTrue(classpath.contains(Environment.PWD.$())); + Assert.assertTrue(classpath.indexOf("foobar") > + classpath.indexOf(Environment.PWD.$())); + } }
