Author: vinodkv Date: Tue Mar 11 01:30:20 2014 New Revision: 1576171 URL: http://svn.apache.org/r1576171 Log: MAPREDUCE-5028. Fixed a bug in MapTask that was causing mappers to fail when a large value of io.sort.mb is set. Contributed by Karthik Kambatla. svn merge --ignore-ancestry -c 1576170 ../../trunk/
Added: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/LargeSorter.java - copied unchanged from r1576170, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/LargeSorter.java hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestLargeSort.java - copied unchanged from r1576170, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestLargeSort.java Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/ReduceContextImpl.java hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/InMemoryReader.java hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/test/MapredTestDriver.java Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt?rev=1576171&r1=1576170&r2=1576171&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt Tue Mar 11 01:30:20 2014 @@ -69,6 +69,9 @@ Release 2.4.0 - UNRELEASED MAPREDUCE-5780. SliveTest should use the specified path to get the particular FileSystem instead of using the default FileSystem. (szetszwo) + MAPREDUCE-5028. Fixed a bug in MapTask that was causing mappers to fail + when a large value of io.sort.mb is set. (Karthik Kambatla via vinodkv) + Release 2.3.1 - UNRELEASED INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java?rev=1576171&r1=1576170&r2=1576171&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java Tue Mar 11 01:30:20 2014 @@ -1176,8 +1176,9 @@ public class MapTask extends Task { equator = pos; // set index prior to first entry, aligned at meta boundary final int aligned = pos - (pos % METASIZE); - kvindex = - ((aligned - METASIZE + kvbuffer.length) % kvbuffer.length) / 4; + // Cast one of the operands to long to avoid integer overflow + kvindex = (int) + (((long)aligned - METASIZE + kvbuffer.length) % kvbuffer.length) / 4; LOG.info("(EQUATOR) " + pos + " kvi " + kvindex + "(" + (kvindex * 4) + ")"); } @@ -1192,8 +1193,9 @@ public class MapTask extends Task { bufstart = bufend = e; final int aligned = e - (e % METASIZE); // set start/end to point to first meta record - kvstart = kvend = - ((aligned - METASIZE + kvbuffer.length) % kvbuffer.length) / 4; + // Cast one of the operands to long to avoid integer overflow + kvstart = kvend = (int) + (((long)aligned - METASIZE + kvbuffer.length) % kvbuffer.length) / 4; LOG.info("(RESET) equator " + e + " kv " + kvstart + "(" + (kvstart * 4) + ")" + " kvi " + kvindex + "(" + (kvindex * 4) + ")"); } Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/ReduceContextImpl.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/ReduceContextImpl.java?rev=1576171&r1=1576170&r2=1576171&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/ReduceContextImpl.java (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/ReduceContextImpl.java Tue Mar 11 01:30:20 2014 @@ -141,7 +141,8 @@ public class ReduceContextImpl<KEYIN,VAL buffer.reset(currentRawKey.getBytes(), 0, currentRawKey.getLength()); key = keyDeserializer.deserialize(key); DataInputBuffer nextVal = input.getValue(); - buffer.reset(nextVal.getData(), nextVal.getPosition(), nextVal.getLength()); + buffer.reset(nextVal.getData(), nextVal.getPosition(), nextVal.getLength() + - nextVal.getPosition()); value = valueDeserializer.deserialize(value); currentKeyLength = nextKey.getLength() - nextKey.getPosition(); @@ -205,7 +206,8 @@ public class ReduceContextImpl<KEYIN,VAL if (backupStore.hasNext()) { backupStore.next(); DataInputBuffer next = backupStore.nextValue(); - buffer.reset(next.getData(), next.getPosition(), next.getLength()); + buffer.reset(next.getData(), next.getPosition(), next.getLength() + - next.getPosition()); value = valueDeserializer.deserialize(value); return value; } else { Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/InMemoryReader.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/InMemoryReader.java?rev=1576171&r1=1576170&r2=1576171&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/InMemoryReader.java (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/InMemoryReader.java Tue Mar 11 01:30:20 2014 @@ -37,9 +37,9 @@ import org.apache.hadoop.mapreduce.TaskA public class InMemoryReader<K, V> extends Reader<K, V> { private final TaskAttemptID taskAttemptId; private final MergeManagerImpl<K,V> merger; - DataInputBuffer memDataIn = new DataInputBuffer(); - private int start; - private int length; + private final DataInputBuffer memDataIn = new DataInputBuffer(); + private final int start; + private final int length; public InMemoryReader(MergeManagerImpl<K,V> merger, TaskAttemptID taskAttemptId, byte[] data, int start, int length, Configuration conf) @@ -50,14 +50,14 @@ public class InMemoryReader<K, V> extend buffer = data; bufferSize = (int)fileLength; - memDataIn.reset(buffer, start, length); + memDataIn.reset(buffer, start, length - start); this.start = start; this.length = length; } @Override public void reset(int offset) { - memDataIn.reset(buffer, start + offset, length); + memDataIn.reset(buffer, start + offset, length - start - offset); bytesRead = offset; eof = false; } Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/test/MapredTestDriver.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/test/MapredTestDriver.java?rev=1576171&r1=1576170&r2=1576171&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/test/MapredTestDriver.java (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/test/MapredTestDriver.java Tue Mar 11 01:30:20 2014 @@ -29,6 +29,7 @@ import org.apache.hadoop.mapred.TestSequ import org.apache.hadoop.mapred.TestTextInputFormat; import org.apache.hadoop.mapred.ThreadedMapBenchmark; import org.apache.hadoop.mapreduce.FailJob; +import org.apache.hadoop.mapreduce.LargeSorter; import org.apache.hadoop.mapreduce.MiniHadoopClusterManager; import org.apache.hadoop.mapreduce.SleepJob; import org.apache.hadoop.util.ProgramDriver; @@ -104,6 +105,8 @@ public class MapredTestDriver { "HDFS Stress Test and Live Data Verification."); pgd.addClass("minicluster", MiniHadoopClusterManager.class, "Single process HDFS and MR cluster."); + pgd.addClass("largesorter", LargeSorter.class, + "Large-Sort tester"); } catch(Throwable e) { e.printStackTrace(); }