Author: jlowe Date: Tue Oct 1 22:40:01 2013 New Revision: 1528242 URL: http://svn.apache.org/r1528242 Log: svn merge -c 1528237 FIXES: MAPREDUCE-4421. Run MapReduce framework via the distributed cache. Contributed by Jason Lowe
Added: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/DistributedCacheDeploy.apt.vm - copied unchanged from r1528237, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/DistributedCacheDeploy.apt.vm Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt?rev=1528242&r1=1528241&r2=1528242&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt Tue Oct 1 22:40:01 2013 @@ -25,6 +25,8 @@ Release 2.3.0 - UNRELEASED MAPREDUCE-434. LocalJobRunner limited to single reducer (Sandy Ryza and Aaron Kimball via Sandy Ryza) + MAPREDUCE-4421. Run MapReduce framework via the distributed cache (jlowe) + OPTIMIZATIONS MAPREDUCE-5484. YarnChild unnecessarily loads job conf twice (Sandy Ryza) Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java?rev=1528242&r1=1528241&r2=1528242&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java Tue Oct 1 22:40:01 2013 @@ -21,6 +21,7 @@ package org.apache.hadoop.mapreduce.v2.u import java.io.IOException; import java.net.MalformedURLException; import java.net.URI; +import java.net.URISyntaxException; import java.security.AccessController; import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; @@ -133,6 +134,30 @@ public class MRApps extends Apps { return TaskAttemptStateUI.valueOf(attemptStateStr); } + // gets the base name of the MapReduce framework or null if no + // framework was configured + private static String getMRFrameworkName(Configuration conf) { + String frameworkName = null; + String framework = + conf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, ""); + if (!framework.isEmpty()) { + URI uri; + try { + uri = new URI(framework); + } catch (URISyntaxException e) { + throw new IllegalArgumentException("Unable to parse '" + framework + + "' as a URI, check the setting for " + + MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e); + } + + frameworkName = uri.getFragment(); + if (frameworkName == null) { + frameworkName = new Path(uri).getName(); + } + } + return frameworkName; + } + private static void setMRFrameworkClasspath( Map<String, String> environment, Configuration conf) throws IOException { // Propagate the system classpath when using the mini cluster @@ -141,18 +166,33 @@ public class MRApps extends Apps { System.getProperty("java.class.path")); } - // Add standard Hadoop classes - for (String c : conf.getStrings( - YarnConfiguration.YARN_APPLICATION_CLASSPATH, - YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { - Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c - .trim()); + // if the framework is specified then only use the MR classpath + String frameworkName = getMRFrameworkName(conf); + if (frameworkName == null) { + // Add standard Hadoop classes + for (String c : conf.getStrings( + YarnConfiguration.YARN_APPLICATION_CLASSPATH, + YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { + Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c + .trim()); + } } + + boolean foundFrameworkInClasspath = (frameworkName == null); for (String c : conf.getStrings( MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, MRJobConfig.DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH)) { Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c .trim()); + if (!foundFrameworkInClasspath) { + foundFrameworkInClasspath = c.contains(frameworkName); + } + } + + if (!foundFrameworkInClasspath) { + throw new IllegalArgumentException( + "Could not locate MapReduce framework name '" + frameworkName + + "' in " + MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH); } // TODO: Remove duplicates. } Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java?rev=1528242&r1=1528241&r2=1528242&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java Tue Oct 1 22:40:01 2013 @@ -283,7 +283,46 @@ public class TestMRApps { assertEquals("MAPREDUCE_JOB_CLASSLOADER true, but job.jar is not in the app" + " classpath!", expectedAppClasspath, appCp); } - + + @Test (timeout = 3000000) + public void testSetClasspathWithFramework() throws IOException { + final String FRAMEWORK_NAME = "some-framework-name"; + final String FRAMEWORK_PATH = "some-framework-path#" + FRAMEWORK_NAME; + Configuration conf = new Configuration(); + conf.set(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, FRAMEWORK_PATH); + Map<String, String> env = new HashMap<String, String>(); + try { + MRApps.setClasspath(env, conf); + fail("Failed to catch framework path set without classpath change"); + } catch (IllegalArgumentException e) { + assertTrue("Unexpected IllegalArgumentException", + e.getMessage().contains("Could not locate MapReduce framework name '" + + FRAMEWORK_NAME + "'")); + } + + env.clear(); + final String FRAMEWORK_CLASSPATH = FRAMEWORK_NAME + "/*.jar"; + conf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, FRAMEWORK_CLASSPATH); + MRApps.setClasspath(env, conf); + final String stdClasspath = StringUtils.join(File.pathSeparator, + Arrays.asList("job.jar/job.jar", "job.jar/classes/", "job.jar/lib/*", + ApplicationConstants.Environment.PWD.$() + "/*")); + String expectedClasspath = StringUtils.join(File.pathSeparator, + Arrays.asList(ApplicationConstants.Environment.PWD.$(), + FRAMEWORK_CLASSPATH, stdClasspath)); + assertEquals("Incorrect classpath with framework and no user precedence", + expectedClasspath, env.get("CLASSPATH")); + + env.clear(); + conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true); + MRApps.setClasspath(env, conf); + expectedClasspath = StringUtils.join(File.pathSeparator, + Arrays.asList(ApplicationConstants.Environment.PWD.$(), + stdClasspath, FRAMEWORK_CLASSPATH)); + assertEquals("Incorrect classpath with framework and user precedence", + expectedClasspath, env.get("CLASSPATH")); + } + @Test (timeout = 30000) public void testSetupDistributedCacheEmpty() throws IOException { Configuration conf = new Configuration(); Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java?rev=1528242&r1=1528241&r2=1528242&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java Tue Oct 1 22:40:01 2013 @@ -39,6 +39,7 @@ import org.apache.hadoop.classification. import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; @@ -340,11 +341,12 @@ class JobSubmitter { //validate the jobs output specs checkSpecs(job); - - Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, - job.getConfiguration()); - //configure the command line options correctly on the submitting dfs + Configuration conf = job.getConfiguration(); + addMRFrameworkToDistributedCache(conf); + + Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, conf); + //configure the command line options correctly on the submitting dfs InetAddress ip = InetAddress.getLocalHost(); if (ip != null) { submitHostAddress = ip.getHostAddress(); @@ -602,7 +604,6 @@ class JobSubmitter { } //get secret keys and tokens and store them into TokenCache - @SuppressWarnings("unchecked") private void populateTokenCache(Configuration conf, Credentials credentials) throws IOException{ readTokensFromFiles(conf, credentials); @@ -618,4 +619,41 @@ class JobSubmitter { TokenCache.obtainTokensForNamenodes(credentials, ps, conf); } } + + @SuppressWarnings("deprecation") + private static void addMRFrameworkToDistributedCache(Configuration conf) + throws IOException { + String framework = + conf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, ""); + if (!framework.isEmpty()) { + URI uri; + try { + uri = new URI(framework); + } catch (URISyntaxException e) { + throw new IllegalArgumentException("Unable to parse '" + framework + + "' as a URI, check the setting for " + + MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e); + } + + String linkedName = uri.getFragment(); + + // resolve any symlinks in the URI path so using a "current" symlink + // to point to a specific version shows the specific version + // in the distributed cache configuration + FileSystem fs = FileSystem.get(conf); + Path frameworkPath = fs.makeQualified( + new Path(uri.getScheme(), uri.getAuthority(), uri.getPath())); + FileContext fc = FileContext.getFileContext(frameworkPath.toUri(), conf); + frameworkPath = fc.resolvePath(frameworkPath); + uri = frameworkPath.toUri(); + try { + uri = new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), + null, linkedName); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + + DistributedCache.addCacheArchive(uri, conf); + } + } } Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java?rev=1528242&r1=1528241&r2=1528242&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java Tue Oct 1 22:40:01 2013 @@ -645,6 +645,12 @@ public interface MRJobConfig { "mapreduce.application.classpath"; /** + * Path to MapReduce framework archive + */ + public static final String MAPREDUCE_APPLICATION_FRAMEWORK_PATH = + "mapreduce.application.framework.path"; + + /** * Default CLASSPATH for all YARN MapReduce applications. */ public static final String[] DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH = { Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml?rev=1528242&r1=1528241&r2=1528242&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml Tue Oct 1 22:40:01 2013 @@ -1458,12 +1458,32 @@ <property> <description>CLASSPATH for MR applications. A comma-separated list - of CLASSPATH entries</description> + of CLASSPATH entries. If mapreduce.application.framework is set then this + must specify the appropriate classpath for that archive, and the name of + the archive must be present in the classpath.</description> <name>mapreduce.application.classpath</name> <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value> </property> <property> + <description>Path to the MapReduce framework archive. If set, the framework + archive will automatically be distributed along with the job, and this + path would normally reside in a public location in an HDFS filesystem. As + with distributed cache files, this can be a URL with a fragment specifying + the alias to use for the archive name. For example, + hdfs:/mapred/framework/hadoop-mapreduce-2.1.1.tar.gz#mrframework would + alias the localized archive as "mrframework". + + Note that mapreduce.application.classpath must include the appropriate + classpath for the specified framework. The base name of the archive, or + alias of the archive if an alias is used, must appear in the specified + classpath. + </description> + <name>mapreduce.application.framework.path</name> + <value></value> +</property> + +<property> <name>mapreduce.job.classloader</name> <value>false</value> <description>Whether to use a separate (isolated) classloader for