Author: cdouglas
Date: Tue Jan 26 07:54:40 2010
New Revision: 903117
URL: http://svn.apache.org/viewvc?rev=903117&view=rev
Log:
MAPREDUCE-433. Use more reliable counters in TestReduceFetch.
Modified:
hadoop/common/branches/branch-0.20/CHANGES.txt
hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/ReduceTask.java
hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestReduceFetch.java
Modified: hadoop/common/branches/branch-0.20/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/CHANGES.txt?rev=903117&r1=903116&r2=903117&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20/CHANGES.txt Tue Jan 26 07:54:40 2010
@@ -87,6 +87,8 @@
HDFS-187. Initialize secondary namenode http address in TestStartup.
(Todd Lipcon via szetszwo)
+ MAPREDUCE-433. Use more reliable counters in TestReduceFetch. (cdouglas)
+
HDFS-792. DFSClient 0.20.1 is incompatible with HDFS 0.20.2.
(Tod Lipcon via hairong)
Modified:
hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/ReduceTask.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/ReduceTask.java?rev=903117&r1=903116&r2=903117&view=diff
==============================================================================
---
hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/ReduceTask.java
(original)
+++
hadoop/common/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/ReduceTask.java
Tue Jan 26 07:54:40 2010
@@ -987,9 +987,10 @@
throw new IOException("mapred.job.shuffle.input.buffer.percent" +
maxInMemCopyUse);
}
- maxSize = (long)Math.min(
- Runtime.getRuntime().maxMemory() * maxInMemCopyUse,
- Integer.MAX_VALUE);
+ // Allow unit tests to fix Runtime memory
+ maxSize = (int)(conf.getInt("mapred.job.reduce.total.mem.bytes",
+ (int)Math.min(Runtime.getRuntime().maxMemory(), Integer.MAX_VALUE))
+ * maxInMemCopyUse);
maxSingleShuffleLimit = (long)(maxSize *
MAX_SINGLE_SHUFFLE_SEGMENT_FRACTION);
LOG.info("ShuffleRamManager: MemoryLimit=" + maxSize +
", MaxSingleShuffleLimit=" + maxSingleShuffleLimit);
Modified:
hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestReduceFetch.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestReduceFetch.java?rev=903117&r1=903116&r2=903117&view=diff
==============================================================================
---
hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestReduceFetch.java
(original)
+++
hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestReduceFetch.java
Tue Jan 26 07:54:40 2010
@@ -101,48 +101,53 @@
}
public void testReduceFromDisk() throws Exception {
+ final int MAP_TASKS = 8;
JobConf job = mrCluster.createJobConf();
job.set("mapred.job.reduce.input.buffer.percent", "0.0");
- job.setNumMapTasks(3);
+ job.setNumMapTasks(MAP_TASKS);
+ job.setInt("mapred.job.reduce.total.mem.bytes", 128 << 20);
+ job.set("mapred.job.shuffle.input.buffer.percent", "0.05");
+ job.setInt("io.sort.factor", 2);
+ job.setInt("mapred.inmem.merge.threshold", 4);
Counters c = runJob(job);
- final long hdfsWritten = c.findCounter(Task.FILESYSTEM_COUNTER_GROUP,
- Task.getFileSystemCounterNames("hdfs")[1]).getCounter();
- final long localRead = c.findCounter(Task.FILESYSTEM_COUNTER_GROUP,
- Task.getFileSystemCounterNames("file")[0]).getCounter();
- assertTrue("Expected more bytes read from local (" +
- localRead + ") than written to HDFS (" + hdfsWritten + ")",
- hdfsWritten <= localRead);
+ final long spill =
c.findCounter(Task.Counter.SPILLED_RECORDS).getCounter();
+ final long out =
c.findCounter(Task.Counter.MAP_OUTPUT_RECORDS).getCounter();
+ assertTrue("Expected all records spilled during reduce (" + spill + ")",
+ spill >= 2 * out); // all records spill at map, reduce
+ assertTrue("Expected intermediate merges (" + spill + ")",
+ spill >= 2 * out + (out / MAP_TASKS)); // some records hit twice
}
public void testReduceFromPartialMem() throws Exception {
+ final int MAP_TASKS = 7;
JobConf job = mrCluster.createJobConf();
- job.setNumMapTasks(5);
+ job.setNumMapTasks(MAP_TASKS);
job.setInt("mapred.inmem.merge.threshold", 0);
job.set("mapred.job.reduce.input.buffer.percent", "1.0");
job.setInt("mapred.reduce.parallel.copies", 1);
job.setInt("io.sort.mb", 10);
- job.set("mapred.child.java.opts", "-Xmx128m");
+ job.setInt("mapred.job.reduce.total.mem.bytes", 128 << 20);
job.set("mapred.job.shuffle.input.buffer.percent", "0.14");
job.setNumTasksToExecutePerJvm(1);
job.set("mapred.job.shuffle.merge.percent", "1.0");
Counters c = runJob(job);
- final long hdfsWritten = c.findCounter(Task.FILESYSTEM_COUNTER_GROUP,
- Task.getFileSystemCounterNames("hdfs")[1]).getCounter();
- final long localRead = c.findCounter(Task.FILESYSTEM_COUNTER_GROUP,
- Task.getFileSystemCounterNames("file")[0]).getCounter();
- assertTrue("Expected at least 1MB fewer bytes read from local (" +
- localRead + ") than written to HDFS (" + hdfsWritten + ")",
- hdfsWritten >= localRead + 1024 * 1024);
+ final long out =
c.findCounter(Task.Counter.MAP_OUTPUT_RECORDS).getCounter();
+ final long spill =
c.findCounter(Task.Counter.SPILLED_RECORDS).getCounter();
+ assertTrue("Expected some records not spilled during reduce" + spill + ")",
+ spill < 2 * out); // spilled map records, some records at the reduce
}
public void testReduceFromMem() throws Exception {
+ final int MAP_TASKS = 3;
JobConf job = mrCluster.createJobConf();
job.set("mapred.job.reduce.input.buffer.percent", "1.0");
- job.setNumMapTasks(3);
+ job.set("mapred.job.shuffle.input.buffer.percent", "1.0");
+ job.setInt("mapred.job.reduce.total.mem.bytes", 128 << 20);
+ job.setNumMapTasks(MAP_TASKS);
Counters c = runJob(job);
- final long localRead = c.findCounter(Task.FILESYSTEM_COUNTER_GROUP,
- Task.getFileSystemCounterNames("file")[0]).getCounter();
- assertTrue("Non-zero read from local: " + localRead, localRead == 0);
+ final long spill =
c.findCounter(Task.Counter.SPILLED_RECORDS).getCounter();
+ final long out =
c.findCounter(Task.Counter.MAP_OUTPUT_RECORDS).getCounter();
+ assertEquals("Spilled records: " + spill, out, spill); // no reduce spill
}
}