Author: acmurthy
Date: Mon Aug 1 21:49:39 2011
New Revision: 1152944
URL: http://svn.apache.org/viewvc?rev=1152944&view=rev
Log:
MAPREDUCE-2187. Reporter sends progress during sort/merge. Contributed by
Anupam Seth.
Added:
hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/mapred/TestCombineOutputCollector.java
Modified:
hadoop/common/branches/branch-0.20-security/CHANGES.txt
hadoop/common/branches/branch-0.20-security/src/mapred/mapred-default.xml
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/MapTask.java
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/ReduceTask.java
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/Task.java
Modified: hadoop/common/branches/branch-0.20-security/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/CHANGES.txt?rev=1152944&r1=1152943&r2=1152944&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20-security/CHANGES.txt Mon Aug 1 21:49:39
2011
@@ -180,6 +180,9 @@ Release 0.20.204.0 - unreleased
IMPROVEMENTS
+ MAPREDUCE-2187. Reporter sends progress during sort/merge. (Anupam Seth via
+ acmurthy)
+
HADOOP-7144. Expose JMX metrics via JSON servlet. (Robert Joseph Evans via
cdouglas)
Modified:
hadoop/common/branches/branch-0.20-security/src/mapred/mapred-default.xml
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/mapred/mapred-default.xml?rev=1152944&r1=1152943&r2=1152944&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/mapred/mapred-default.xml
(original)
+++ hadoop/common/branches/branch-0.20-security/src/mapred/mapred-default.xml
Mon Aug 1 21:49:39 2011
@@ -1132,6 +1132,14 @@
</property>
<property>
+ <name>mapred.combine.recordsBeforeProgress</name>
+ <value>10000</value>
+ <description> The number of records to process during combine output
collection
+ before sending a progress notification to the TaskTracker.
+ </description>
+</property>
+
+<property>
<name>mapred.merge.recordsBeforeProgress</name>
<value>10000</value>
<description> The number of records to process during merge before
Modified:
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/MapTask.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/MapTask.java?rev=1152944&r1=1152943&r2=1152944&view=diff
==============================================================================
---
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/MapTask.java
(original)
+++
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/MapTask.java
Mon Aug 1 21:49:39 2011
@@ -982,7 +982,7 @@ class MapTask extends Task {
combineInputCounter,
reporter, null);
if (combinerRunner != null) {
- combineCollector= new
CombineOutputCollector<K,V>(combineOutputCounter);
+ combineCollector= new
CombineOutputCollector<K,V>(combineOutputCounter, reporter, conf);
} else {
combineCollector = null;
}
Modified:
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/ReduceTask.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/ReduceTask.java?rev=1152944&r1=1152943&r2=1152944&view=diff
==============================================================================
---
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/ReduceTask.java
(original)
+++
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/ReduceTask.java
Mon Aug 1 21:49:39 2011
@@ -1928,7 +1928,7 @@ class ReduceTask extends Task {
reporter, null);
if (combinerRunner != null) {
combineCollector =
- new CombineOutputCollector(reduceCombineOutputCounter);
+ new CombineOutputCollector(reduceCombineOutputCounter, reporter,
conf);
}
this.ioSortFactor = conf.getInt("io.sort.factor", 10);
Modified:
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/Task.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/Task.java?rev=1152944&r1=1152943&r2=1152944&view=diff
==============================================================================
---
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/Task.java
(original)
+++
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/mapred/Task.java
Mon Aug 1 21:49:39 2011
@@ -65,6 +65,8 @@ import org.apache.hadoop.fs.FSDataInputS
abstract public class Task implements Writable, Configurable {
private static final Log LOG =
LogFactory.getLog(Task.class);
+ public static final String MR_COMBINE_RECORDS_BEFORE_PROGRESS =
"mapred.combine.recordsBeforeProgress";
+ public static final long DEFAULT_MR_COMBINE_RECORDS_BEFORE_PROGRESS = 10000;
// Counters used by Task subclasses
public static enum Counter {
@@ -1083,16 +1085,26 @@ abstract public class Task implements Wr
implements OutputCollector<K, V> {
private Writer<K, V> writer;
private Counters.Counter outCounter;
- public CombineOutputCollector(Counters.Counter outCounter) {
+ private Progressable progressable;
+ private long progressBar;
+
+ public CombineOutputCollector(Counters.Counter outCounter, Progressable
progressable, Configuration conf) {
this.outCounter = outCounter;
+ this.progressable=progressable;
+ progressBar = conf.getLong(MR_COMBINE_RECORDS_BEFORE_PROGRESS,
DEFAULT_MR_COMBINE_RECORDS_BEFORE_PROGRESS);
}
+
public synchronized void setWriter(Writer<K, V> writer) {
this.writer = writer;
}
+
public synchronized void collect(K key, V value)
throws IOException {
outCounter.increment(1);
writer.append(key, value);
+ if ((outCounter.getValue() % progressBar) == 0) {
+ progressable.progress();
+ }
}
}
Added:
hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/mapred/TestCombineOutputCollector.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/mapred/TestCombineOutputCollector.java?rev=1152944&view=auto
==============================================================================
---
hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/mapred/TestCombineOutputCollector.java
(added)
+++
hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/mapred/TestCombineOutputCollector.java
Mon Aug 1 21:49:39 2011
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.IFile.Writer;
+import org.apache.hadoop.mapred.Task.CombineOutputCollector;
+import org.apache.hadoop.mapred.Task.TaskReporter;
+import org.junit.Test;
+
+public class TestCombineOutputCollector {
+ private CombineOutputCollector<String, Integer> coc;
+
+ @Test
+ public void testCustomCollect() throws Throwable {
+ //mock creation
+ TaskReporter mockTaskReporter = mock(TaskReporter.class);
+ Counters.Counter outCounter = new Counters.Counter();
+ Writer<String, Integer> mockWriter = mock(Writer.class);
+
+ Configuration conf = new Configuration();
+ conf.set("mapred.combine.recordsBeforeProgress", "2");
+
+ coc = new CombineOutputCollector<String, Integer>(outCounter,
mockTaskReporter, conf);
+ coc.setWriter(mockWriter);
+ verify(mockTaskReporter, never()).progress();
+
+ coc.collect("dummy", 1);
+ verify(mockTaskReporter, never()).progress();
+
+ coc.collect("dummy", 2);
+ verify(mockTaskReporter, times(1)).progress();
+ }
+
+ @Test
+ public void testDefaultCollect() throws Throwable {
+ //mock creation
+ TaskReporter mockTaskReporter = mock(TaskReporter.class);
+ Counters.Counter outCounter = new Counters.Counter();
+ Writer<String, Integer> mockWriter = mock(Writer.class);
+
+ Configuration conf = new Configuration();
+
+ coc = new CombineOutputCollector<String, Integer>(outCounter,
mockTaskReporter, conf);
+ coc.setWriter(mockWriter);
+ verify(mockTaskReporter, never()).progress();
+
+ for(int i = 0; i < Task.DEFAULT_MR_COMBINE_RECORDS_BEFORE_PROGRESS; i++) {
+ coc.collect("dummy", i);
+ }
+ verify(mockTaskReporter, times(1)).progress();
+ for(int i = 0; i < Task.DEFAULT_MR_COMBINE_RECORDS_BEFORE_PROGRESS; i++) {
+ coc.collect("dummy", i);
+ }
+ verify(mockTaskReporter, times(2)).progress();
+ }
+}