Author: arp Date: Sat Aug 23 07:49:41 2014 New Revision: 1619980 URL: http://svn.apache.org/r1619980 Log: Merging r1619458 through r1619979 from trunk to branch HDFS-6581
Modified: hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/ (props changed) hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/CHANGES.txt (contents, props changed) hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml (contents, props changed) hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/PluggableShuffleAndPluggableSort.apt.vm hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestJobConf.java hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/conf/TestJobConf.java hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java Propchange: hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/ ------------------------------------------------------------------------------ Merged /hadoop/common/trunk/hadoop-mapreduce-project:r1619458-1619979 Modified: hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/CHANGES.txt?rev=1619980&r1=1619979&r2=1619980&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/CHANGES.txt Sat Aug 23 07:49:41 2014 @@ -187,6 +187,12 @@ Release 2.6.0 - UNRELEASED MAPREDUCE-5906. Inconsistent configuration in property "mapreduce.reduce.shuffle.input.buffer.percent" (Akira AJISAKA via aw) + MAPREDUCE-5974. Allow specifying multiple MapOutputCollectors with + fallback. (Todd Lipcon via kasha) + + MAPREDUCE-5130. Add missing job config options to mapred-default.xml + (Ray Chiang via Sandy Ryza) + OPTIMIZATIONS BUG FIXES @@ -249,6 +255,9 @@ Release 2.6.0 - UNRELEASED MAPREDUCE-6012. DBInputSplit creates invalid ranges on Oracle. (Wei Yan via kasha) + MAPREDUCE-6044. Fully qualified intermediate done dir path breaks per-user dir + creation on Windows. (zjshen) + Release 2.5.0 - 2014-08-11 INCOMPATIBLE CHANGES Propchange: hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/CHANGES.txt ------------------------------------------------------------------------------ Merged /hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt:r1619458-1619979 Modified: hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java?rev=1619980&r1=1619979&r2=1619980&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java (original) +++ hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java Sat Aug 23 07:49:41 2014 @@ -25,7 +25,6 @@ import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import static org.mockito.Mockito.never; import java.io.File; import java.io.FileOutputStream; @@ -53,6 +52,8 @@ import org.apache.hadoop.mapreduce.v2.ap import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.app.job.JobStateInternal; +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -399,6 +400,33 @@ public class TestJobHistoryEventHandler } } + @Test + public void testGetHistoryIntermediateDoneDirForUser() throws IOException { + // Test relative path + Configuration conf = new Configuration(); + conf.set(JHAdminConfig.MR_HISTORY_INTERMEDIATE_DONE_DIR, + "/mapred/history/done_intermediate"); + conf.set(MRJobConfig.USER_NAME, System.getProperty("user.name")); + String pathStr = JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf); + Assert.assertEquals("/mapred/history/done_intermediate/" + + System.getProperty("user.name"), pathStr); + + // Test fully qualified path + // Create default configuration pointing to the minicluster + conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, + dfsCluster.getURI().toString()); + FileOutputStream os = new FileOutputStream(coreSitePath); + conf.writeXml(os); + os.close(); + // Simulate execution under a non-default namenode + conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, + "file:///"); + pathStr = JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf); + Assert.assertEquals(dfsCluster.getURI().toString() + + "/mapred/history/done_intermediate/" + System.getProperty("user.name"), + pathStr); + } + private void queueEvent(JHEvenHandlerForTest jheh, JobHistoryEvent event) { jheh.handle(event); } Modified: hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java?rev=1619980&r1=1619979&r2=1619980&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java (original) +++ hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java Sat Aug 23 07:49:41 2014 @@ -292,8 +292,8 @@ public class JobHistoryUtils { * @return the intermediate done directory for jobhistory files. */ public static String getHistoryIntermediateDoneDirForUser(Configuration conf) throws IOException { - return getConfiguredHistoryIntermediateDoneDirPrefix(conf) + File.separator - + UserGroupInformation.getCurrentUser().getShortUserName(); + return new Path(getConfiguredHistoryIntermediateDoneDirPrefix(conf), + UserGroupInformation.getCurrentUser().getShortUserName()).toString(); } public static boolean shouldCreateNonUserDirectory(Configuration conf) { Modified: hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java?rev=1619980&r1=1619979&r2=1619980&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java (original) +++ hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java Sat Aug 23 07:49:41 2014 @@ -34,6 +34,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import com.google.common.annotations.VisibleForTesting; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -56,6 +57,7 @@ import org.apache.hadoop.mapreduce.v2.ap import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskState; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; +import org.apache.hadoop.util.ApplicationClassLoader; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.StringInterner; import org.apache.hadoop.util.StringUtils; @@ -67,7 +69,6 @@ import org.apache.hadoop.yarn.api.record import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; -import org.apache.hadoop.yarn.util.ApplicationClassLoader; import org.apache.hadoop.yarn.util.Apps; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.log4j.RollingFileAppender; Modified: hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java?rev=1619980&r1=1619979&r2=1619980&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java (original) +++ hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java Sat Aug 23 07:49:41 2014 @@ -51,6 +51,7 @@ import org.apache.hadoop.mapreduce.v2.ap import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskState; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; +import org.apache.hadoop.util.ApplicationClassLoader; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -58,7 +59,6 @@ import org.apache.hadoop.yarn.api.record import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; -import org.apache.hadoop.yarn.util.ApplicationClassLoader; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -514,7 +514,8 @@ public class TestMRApps { @Test public void testSystemClasses() { final List<String> systemClasses = - Arrays.asList(MRApps.getSystemClasses(new Configuration())); + Arrays.asList(StringUtils.getTrimmedStrings( + ApplicationClassLoader.DEFAULT_SYSTEM_CLASSES)); for (String defaultXml : DEFAULT_XMLS) { assertTrue(defaultXml + " must be system resource", ApplicationClassLoader.isSystemClass(defaultXml, systemClasses)); Modified: hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java?rev=1619980&r1=1619979&r2=1619980&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java (original) +++ hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java Sat Aug 23 07:49:41 2014 @@ -151,7 +151,9 @@ public class JobConf extends Configurati /** * A value which if set for memory related configuration options, * indicates that the options are turned off. + * Deprecated because it makes no sense in the context of MR2. */ + @Deprecated public static final long DISABLED_MEMORY_LIMIT = -1L; /** @@ -1809,27 +1811,19 @@ public class JobConf extends Configurati * Get memory required to run a map task of the job, in MB. * * If a value is specified in the configuration, it is returned. - * Else, it returns {@link #DISABLED_MEMORY_LIMIT}. + * Else, it returns {@link JobContext#DEFAULT_MAP_MEMORY_MB}. * <p/> * For backward compatibility, if the job configuration sets the * key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different * from {@link #DISABLED_MEMORY_LIMIT}, that value will be used * after converting it from bytes to MB. * @return memory required to run a map task of the job, in MB, - * or {@link #DISABLED_MEMORY_LIMIT} if unset. */ public long getMemoryForMapTask() { long value = getDeprecatedMemoryValue(); - if (value == DISABLED_MEMORY_LIMIT) { - value = normalizeMemoryConfigValue( - getLong(JobConf.MAPREDUCE_JOB_MAP_MEMORY_MB_PROPERTY, - DISABLED_MEMORY_LIMIT)); - } - // In case that M/R 1.x applications use the old property name - if (value == DISABLED_MEMORY_LIMIT) { - value = normalizeMemoryConfigValue( - getLong(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY, - DISABLED_MEMORY_LIMIT)); + if (value < 0) { + return getLong(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY, + JobContext.DEFAULT_MAP_MEMORY_MB); } return value; } @@ -1844,27 +1838,19 @@ public class JobConf extends Configurati * Get memory required to run a reduce task of the job, in MB. * * If a value is specified in the configuration, it is returned. - * Else, it returns {@link #DISABLED_MEMORY_LIMIT}. + * Else, it returns {@link JobContext#DEFAULT_REDUCE_MEMORY_MB}. * <p/> * For backward compatibility, if the job configuration sets the * key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different * from {@link #DISABLED_MEMORY_LIMIT}, that value will be used * after converting it from bytes to MB. - * @return memory required to run a reduce task of the job, in MB, - * or {@link #DISABLED_MEMORY_LIMIT} if unset. + * @return memory required to run a reduce task of the job, in MB. */ public long getMemoryForReduceTask() { long value = getDeprecatedMemoryValue(); - if (value == DISABLED_MEMORY_LIMIT) { - value = normalizeMemoryConfigValue( - getLong(JobConf.MAPREDUCE_JOB_REDUCE_MEMORY_MB_PROPERTY, - DISABLED_MEMORY_LIMIT)); - } - // In case that M/R 1.x applications use the old property name - if (value == DISABLED_MEMORY_LIMIT) { - value = normalizeMemoryConfigValue( - getLong(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY, - DISABLED_MEMORY_LIMIT)); + if (value < 0) { + return getLong(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY, + JobContext.DEFAULT_REDUCE_MEMORY_MB); } return value; } @@ -1876,8 +1862,7 @@ public class JobConf extends Configurati private long getDeprecatedMemoryValue() { long oldValue = getLong(MAPRED_TASK_MAXVMEM_PROPERTY, DISABLED_MEMORY_LIMIT); - oldValue = normalizeMemoryConfigValue(oldValue); - if (oldValue != DISABLED_MEMORY_LIMIT) { + if (oldValue > 0) { oldValue /= (1024*1024); } return oldValue; @@ -1921,39 +1906,6 @@ public class JobConf extends Configurati return val; } - /** - * Compute the number of slots required to run a single map task-attempt - * of this job. - * @param slotSizePerMap cluster-wide value of the amount of memory required - * to run a map-task - * @return the number of slots required to run a single map task-attempt - * 1 if memory parameters are disabled. - */ - int computeNumSlotsPerMap(long slotSizePerMap) { - if ((slotSizePerMap==DISABLED_MEMORY_LIMIT) || - (getMemoryForMapTask()==DISABLED_MEMORY_LIMIT)) { - return 1; - } - return (int)(Math.ceil((float)getMemoryForMapTask() / (float)slotSizePerMap)); - } - - /** - * Compute the number of slots required to run a single reduce task-attempt - * of this job. - * @param slotSizePerReduce cluster-wide value of the amount of memory - * required to run a reduce-task - * @return the number of slots required to run a single reduce task-attempt - * 1 if memory parameters are disabled - */ - int computeNumSlotsPerReduce(long slotSizePerReduce) { - if ((slotSizePerReduce==DISABLED_MEMORY_LIMIT) || - (getMemoryForReduceTask()==DISABLED_MEMORY_LIMIT)) { - return 1; - } - return - (int)(Math.ceil((float)getMemoryForReduceTask() / (float)slotSizePerReduce)); - } - /** * Find a jar that contains a class of the same name, if any. * It will return a jar file, even if that is not the first thing @@ -1975,14 +1927,12 @@ public class JobConf extends Configurati * set for map and reduce tasks of a job, in MB. * <p/> * For backward compatibility, if the job configuration sets the - * key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different - * from {@link #DISABLED_MEMORY_LIMIT}, that value is returned. + * key {@link #MAPRED_TASK_MAXVMEM_PROPERTY}, that value is returned. * Otherwise, this method will return the larger of the values returned by * {@link #getMemoryForMapTask()} and {@link #getMemoryForReduceTask()} * after converting them into bytes. * - * @return Memory required to run a task of this job, in bytes, - * or {@link #DISABLED_MEMORY_LIMIT}, if unset. + * @return Memory required to run a task of this job, in bytes. * @see #setMaxVirtualMemoryForTask(long) * @deprecated Use {@link #getMemoryForMapTask()} and * {@link #getMemoryForReduceTask()} @@ -1993,15 +1943,8 @@ public class JobConf extends Configurati "getMaxVirtualMemoryForTask() is deprecated. " + "Instead use getMemoryForMapTask() and getMemoryForReduceTask()"); - long value = getLong(MAPRED_TASK_MAXVMEM_PROPERTY, DISABLED_MEMORY_LIMIT); - value = normalizeMemoryConfigValue(value); - if (value == DISABLED_MEMORY_LIMIT) { - value = Math.max(getMemoryForMapTask(), getMemoryForReduceTask()); - value = normalizeMemoryConfigValue(value); - if (value != DISABLED_MEMORY_LIMIT) { - value *= 1024*1024; - } - } + long value = getLong(MAPRED_TASK_MAXVMEM_PROPERTY, + Math.max(getMemoryForMapTask(), getMemoryForReduceTask()) * 1024 * 1024); return value; } @@ -2027,9 +1970,8 @@ public class JobConf extends Configurati public void setMaxVirtualMemoryForTask(long vmem) { LOG.warn("setMaxVirtualMemoryForTask() is deprecated."+ "Instead use setMemoryForMapTask() and setMemoryForReduceTask()"); - if(vmem != DISABLED_MEMORY_LIMIT && vmem < 0) { - setMemoryForMapTask(DISABLED_MEMORY_LIMIT); - setMemoryForReduceTask(DISABLED_MEMORY_LIMIT); + if (vmem < 0) { + throw new IllegalArgumentException("Task memory allocation may not be < 0"); } if(get(JobConf.MAPRED_TASK_MAXVMEM_PROPERTY) == null) { Modified: hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java?rev=1619980&r1=1619979&r2=1619980&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java (original) +++ hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java Sat Aug 23 07:49:41 2014 @@ -381,16 +381,35 @@ public class MapTask extends Task { private <KEY, VALUE> MapOutputCollector<KEY, VALUE> createSortingCollector(JobConf job, TaskReporter reporter) throws IOException, ClassNotFoundException { - MapOutputCollector<KEY, VALUE> collector - = (MapOutputCollector<KEY, VALUE>) - ReflectionUtils.newInstance( - job.getClass(JobContext.MAP_OUTPUT_COLLECTOR_CLASS_ATTR, - MapOutputBuffer.class, MapOutputCollector.class), job); - LOG.info("Map output collector class = " + collector.getClass().getName()); MapOutputCollector.Context context = - new MapOutputCollector.Context(this, job, reporter); - collector.init(context); - return collector; + new MapOutputCollector.Context(this, job, reporter); + + Class<?>[] collectorClasses = job.getClasses( + JobContext.MAP_OUTPUT_COLLECTOR_CLASS_ATTR, MapOutputBuffer.class); + int remainingCollectors = collectorClasses.length; + for (Class clazz : collectorClasses) { + try { + if (!MapOutputCollector.class.isAssignableFrom(clazz)) { + throw new IOException("Invalid output collector class: " + clazz.getName() + + " (does not implement MapOutputCollector)"); + } + Class<? extends MapOutputCollector> subclazz = + clazz.asSubclass(MapOutputCollector.class); + LOG.debug("Trying map output collector class: " + subclazz.getName()); + MapOutputCollector<KEY, VALUE> collector = + ReflectionUtils.newInstance(subclazz, job); + collector.init(context); + LOG.info("Map output collector class = " + collector.getClass().getName()); + return collector; + } catch (Exception e) { + String msg = "Unable to initialize MapOutputCollector " + clazz.getName(); + if (--remainingCollectors > 0) { + msg += " (" + remainingCollectors + " more collector(s) to try)"; + } + LOG.warn(msg, e); + } + } + throw new IOException("Unable to initialize any output collector"); } @SuppressWarnings("unchecked") Modified: hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java?rev=1619980&r1=1619979&r2=1619980&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java (original) +++ hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java Sat Aug 23 07:49:41 2014 @@ -278,6 +278,8 @@ public class ConfigUtil { MRJobConfig.TASK_DEBUGOUT_LINES), new DeprecationDelta("mapred.merge.recordsBeforeProgress", MRJobConfig.RECORDS_BEFORE_PROGRESS), + new DeprecationDelta("mapred.merge.recordsBeforeProgress", + MRJobConfig.COMBINE_RECORDS_BEFORE_PROGRESS), new DeprecationDelta("mapred.skip.attempts.to.start.skipping", MRJobConfig.SKIP_START_ATTEMPTS), new DeprecationDelta("mapred.task.id", Modified: hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml?rev=1619980&r1=1619979&r2=1619980&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml (original) +++ hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml Sat Aug 23 07:49:41 2014 @@ -185,11 +185,42 @@ </description> </property> +<property> + <name>mapreduce.map.memory.mb</name> + <value>1024</value> + <description>The amount of memory to request from the scheduler for each + map task. + </description> +</property> + +<property> + <name>mapreduce.map.cpu.vcores</name> + <value>1</value> + <description>The number of virtual cores to request from the scheduler for + each map task. + </description> +</property> + +<property> + <name>mapreduce.reduce.memory.mb</name> + <value>1024</value> + <description>The amount of memory to request from the scheduler for each + reduce task. + </description> +</property> + +<property> + <name>mapreduce.reduce.cpu.vcores</name> + <value>1</value> + <description>The number of virtual cores to request from the scheduler for + each reduce task. + </description> +</property> <property> <name>mapred.child.java.opts</name> <value>-Xmx200m</value> - <description>Java opts for the task tracker child processes. + <description>Java opts for the task processes. The following symbol, if present, will be interpolated: @taskid@ is replaced by current TaskID. Any other occurrences of '@' will go unchanged. For example, to enable verbose gc logging to a file named for the taskid in @@ -203,17 +234,55 @@ </description> </property> +<!-- This is commented out so that it won't override mapred.child.java.opts. +<property> + <name>mapreduce.map.java.opts</name> + <value></value> + <description>Java opts only for the child processes that are maps. If set, + this will be used instead of mapred.child.java.opts. + </description> +</property> +--> + +<!-- This is commented out so that it won't override mapred.child.java.opts. +<property> + <name>mapreduce.reduce.java.opts</name> + <value></value> + <description>Java opts only for the child processes that are reduces. If set, + this will be used instead of mapred.child.java.opts. + </description> +</property> +--> + <property> <name>mapred.child.env</name> <value></value> - <description>User added environment variables for the task tracker child - processes. Example : + <description>User added environment variables for the task processes. + Example : 1) A=foo This will set the env variable A to foo 2) B=$B:c This is inherit nodemanager's B env variable on Unix. 3) B=%B%;c This is inherit nodemanager's B env variable on Windows. </description> </property> +<!-- This is commented out so that it won't override mapred.child.env. +<property> + <name>mapreduce.map.env</name> + <value></value> + <description>User added environment variables for the map task processes. + </description> +</property> +--> + +<!-- This is commented out so that it won't override mapred.child.env. +<property> + <name>mapreduce.reduce.env</name> + <value></value> + <description>User added environment variables for the reduce task processes. + </description> +</property> +--> + <property> <name>mapreduce.admin.user.env</name> <value></value> @@ -408,7 +477,9 @@ <name>mapreduce.job.map.output.collector.class</name> <value>org.apache.hadoop.mapred.MapTask$MapOutputBuffer</value> <description> - It defines the MapOutputCollector implementation to use. + The MapOutputCollector implementation(s) to use. This may be a comma-separated + list of class names, in which case the map task will try to initialize each + of the collectors in turn. The first to successfully initialize will be used. </description> </property> @@ -488,6 +559,12 @@ </description> </property> +<property> + <name>mapreduce.input.lineinputformat.linespermap</name> + <value>1</value> + <description>When using NLineInputFormat, the number of lines of input data + to include in each split.</description> +</property> <property> @@ -922,6 +999,14 @@ </property> <property> + <name>mapreduce.task.combine.progress.records</name> + <value>10000</value> + <description> The number of records to process during combine output collection + before sending a progress notification. + </description> +</property> + +<property> <name>mapreduce.job.reduce.slowstart.completedmaps</name> <value>0.05</value> <description>Fraction of the number of maps in the job which should be @@ -1225,13 +1310,13 @@ <property> <name>mapreduce.job.classloader.system.classes</name> - <value>java.,javax.,org.w3c.dom.,org.xml.sax.,org.apache.commons.logging., - org.apache.log4j.,org.apache.hadoop.,core-default.xml, - hdfs-default.xml,mapred-default.xml,yarn-default.xml</value> - <description>A comma-separated list of classes that should be loaded from the - system classpath, not the user-supplied JARs, when mapreduce.job.classloader - is enabled. Names ending in '.' (period) are treated as package names, - and names starting with a '-' are treated as negative matches. + <value></value> + <description>Used to override the default definition of the system classes for + the job classloader. The system classes are a comma-separated list of + classes that should be loaded from the system classpath, not the + user-supplied JARs, when mapreduce.job.classloader is enabled. Names ending + in '.' (period) are treated as package names, and names starting with a '-' + are treated as negative matches. </description> </property> Propchange: hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml ------------------------------------------------------------------------------ Merged /hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml:r1619458-1619979 Modified: hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/PluggableShuffleAndPluggableSort.apt.vm URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/PluggableShuffleAndPluggableSort.apt.vm?rev=1619980&r1=1619979&r2=1619980&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/PluggableShuffleAndPluggableSort.apt.vm (original) +++ hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/PluggableShuffleAndPluggableSort.apt.vm Sat Aug 23 07:49:41 2014 @@ -71,11 +71,16 @@ Hadoop MapReduce Next Generation - Plugg *--------------------------------------+---------------------+-----------------+ | <<<mapreduce.job.reduce.shuffle.consumer.plugin.class>>> | <<<org.apache.hadoop.mapreduce.task.reduce.Shuffle>>> | The <<<ShuffleConsumerPlugin>>> implementation to use | *--------------------------------------+---------------------+-----------------+ -| <<<mapreduce.job.map.output.collector.class>>> | <<<org.apache.hadoop.mapred.MapTask$MapOutputBuffer>>> | The <<<MapOutputCollector>>> implementation to use | +| <<<mapreduce.job.map.output.collector.class>>> | <<<org.apache.hadoop.mapred.MapTask$MapOutputBuffer>>> | The <<<MapOutputCollector>>> implementation(s) to use | *--------------------------------------+---------------------+-----------------+ These properties can also be set in the <<<mapred-site.xml>>> to change the default values for all jobs. + The collector class configuration may specify a comma-separated list of collector implementations. + In this case, the map task will attempt to instantiate each in turn until one of the + implementations successfully initializes. This can be useful if a given collector + implementation is only compatible with certain types of keys or values, for example. + ** NodeManager Configuration properties, <<<yarn-site.xml>>> in all nodes: *--------------------------------------+---------------------+-----------------+ @@ -91,4 +96,3 @@ Hadoop MapReduce Next Generation - Plugg <<<yarn.nodemanager.aux-services>>> property, for example <<<mapred.shufflex>>>. Then the property defining the corresponding class must be <<<yarn.nodemanager.aux-services.mapreduce_shufflex.class>>>. - \ No newline at end of file Modified: hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestJobConf.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestJobConf.java?rev=1619980&r1=1619979&r2=1619980&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestJobConf.java (original) +++ hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestJobConf.java Sat Aug 23 07:49:41 2014 @@ -140,18 +140,21 @@ public class TestJobConf { conf.setQueueName("qname"); assertEquals("qname", conf.getQueueName()); - assertEquals(1, conf.computeNumSlotsPerMap(100L)); - assertEquals(1, conf.computeNumSlotsPerReduce(100L)); - conf.setMemoryForMapTask(100 * 1000); - assertEquals(1000, conf.computeNumSlotsPerMap(100L)); + assertEquals(100 * 1000, conf.getMemoryForMapTask()); conf.setMemoryForReduceTask(1000 * 1000); - assertEquals(1000, conf.computeNumSlotsPerReduce(1000L)); + assertEquals(1000 * 1000, conf.getMemoryForReduceTask()); assertEquals(-1, conf.getMaxPhysicalMemoryForTask()); assertEquals("The variable key is no longer used.", JobConf.deprecatedString("key")); - + + // make sure mapreduce.map|reduce.java.opts are not set by default + // so that they won't override mapred.child.java.opts + assertEquals("mapreduce.map.java.opts should not be set by default", + null, conf.get(JobConf.MAPRED_MAP_TASK_JAVA_OPTS)); + assertEquals("mapreduce.reduce.java.opts should not be set by default", + null, conf.get(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS)); } /** Modified: hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/conf/TestJobConf.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/conf/TestJobConf.java?rev=1619980&r1=1619979&r2=1619980&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/conf/TestJobConf.java (original) +++ hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/conf/TestJobConf.java Sat Aug 23 07:49:41 2014 @@ -108,6 +108,11 @@ public class TestJobConf { JobConf configuration = new JobConf(); configuration.set(JobConf.MAPRED_TASK_MAXVMEM_PROPERTY, "-3"); + Assert.assertEquals(MRJobConfig.DEFAULT_MAP_MEMORY_MB, + configuration.getMemoryForMapTask()); + Assert.assertEquals(MRJobConfig.DEFAULT_REDUCE_MEMORY_MB, + configuration.getMemoryForReduceTask()); + configuration.set(MRJobConfig.MAP_MEMORY_MB, "4"); configuration.set(MRJobConfig.REDUCE_MEMORY_MB, "5"); Assert.assertEquals(4, configuration.getMemoryForMapTask()); @@ -116,23 +121,16 @@ public class TestJobConf { } /** - * Test that negative values for all memory configuration properties causes - * APIs to disable memory limits + * Test that negative values for new configuration keys get passed through. */ @Test public void testNegativeValuesForMemoryParams() { JobConf configuration = new JobConf(); - - configuration.set(JobConf.MAPRED_TASK_MAXVMEM_PROPERTY, "-4"); + configuration.set(MRJobConfig.MAP_MEMORY_MB, "-5"); configuration.set(MRJobConfig.REDUCE_MEMORY_MB, "-6"); - - Assert.assertEquals(JobConf.DISABLED_MEMORY_LIMIT, - configuration.getMemoryForMapTask()); - Assert.assertEquals(JobConf.DISABLED_MEMORY_LIMIT, - configuration.getMemoryForReduceTask()); - Assert.assertEquals(JobConf.DISABLED_MEMORY_LIMIT, - configuration.getMaxVirtualMemoryForTask()); + Assert.assertEquals(-5, configuration.getMemoryForMapTask()); + Assert.assertEquals(-6, configuration.getMemoryForReduceTask()); } /** Modified: hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java?rev=1619980&r1=1619979&r2=1619980&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java (original) +++ hadoop/common/branches/HDFS-6581/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java Sat Aug 23 07:49:41 2014 @@ -84,13 +84,13 @@ import org.apache.hadoop.mapreduce.v2.ap import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.util.ApplicationClassLoader; import org.apache.hadoop.util.JarFinder; import org.apache.hadoop.util.Shell; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; -import org.apache.hadoop.yarn.util.ApplicationClassLoader; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.log4j.Level; import org.junit.AfterClass; @@ -242,8 +242,7 @@ public class TestMRJobs { // to test AM loading user classes such as output format class, we want // to blacklist them from the system classes (they need to be prepended // as the first match wins) - String systemClasses = - sleepConf.get(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER_SYSTEM_CLASSES); + String systemClasses = ApplicationClassLoader.DEFAULT_SYSTEM_CLASSES; // exclude the custom classes from system classes systemClasses = "-" + CustomOutputFormat.class.getName() + ",-" + CustomSpeculator.class.getName() + "," +