Author: cdouglas
Date: Thu Jul 10 15:15:55 2008
New Revision: 675784
URL: http://svn.apache.org/viewvc?rev=675784&view=rev
Log:
HADOOP-3684. Add a cloning function to the contrib/data_join framework
permitting users to define a more efficient method for cloning values from
the reduce than serialization/deserialization. Contributed by Runping Qi.
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/contrib/data_join/src/java/org/apache/hadoop/contrib/utils/join/DataJoinReducerBase.java
hadoop/core/trunk/src/contrib/data_join/src/java/org/apache/hadoop/contrib/utils/join/TaggedMapOutput.java
Modified: hadoop/core/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=675784&r1=675783&r2=675784&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Thu Jul 10 15:15:55 2008
@@ -69,6 +69,10 @@
HADOOP-3660. Add replication factor for injecting blocks in simulated
datanodes. (Sanjay Radia via cdouglas)
+ HADOOP-3684. Add a cloning function to the contrib/data_join framework
+ permitting users to define a more efficient method for cloning values from
+ the reduce than serialization/deserialization. (Runping Qi via cdouglas)
+
OPTIMIZATIONS
HADOOP-3556. Removed lock contention in MD5Hash by changing the
Modified:
hadoop/core/trunk/src/contrib/data_join/src/java/org/apache/hadoop/contrib/utils/join/DataJoinReducerBase.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/data_join/src/java/org/apache/hadoop/contrib/utils/join/DataJoinReducerBase.java?rev=675784&r1=675783&r2=675784&view=diff
==============================================================================
---
hadoop/core/trunk/src/contrib/data_join/src/java/org/apache/hadoop/contrib/utils/join/DataJoinReducerBase.java
(original)
+++
hadoop/core/trunk/src/contrib/data_join/src/java/org/apache/hadoop/contrib/utils/join/DataJoinReducerBase.java
Thu Jul 10 15:15:55 2008
@@ -95,7 +95,6 @@
SortedMap<Object, ResetableIterator> retv = new TreeMap<Object,
ResetableIterator>();
TaggedMapOutput aRecord = null;
while (arg1.hasNext()) {
- aRecord = (TaggedMapOutput) arg1.next();
this.numOfValues += 1;
if (this.numOfValues % 100 == 0) {
reporter.setStatus("key: " + key.toString() + " numOfValues: "
@@ -104,13 +103,14 @@
if (this.numOfValues > this.maxNumOfValuesPerGroup) {
continue;
}
- Text tag = new Text((Text)aRecord.getTag());
+ aRecord = ((TaggedMapOutput) arg1.next()).clone(job);
+ Text tag = aRecord.getTag();
ResetableIterator data = retv.get(tag);
if (data == null) {
data = createResetableIterator();
retv.put(tag, data);
}
- data.add(WritableUtils.clone(aRecord, job));
+ data.add(aRecord);
}
if (this.numOfValues > this.largestNumOfValues) {
this.largestNumOfValues = numOfValues;
Modified:
hadoop/core/trunk/src/contrib/data_join/src/java/org/apache/hadoop/contrib/utils/join/TaggedMapOutput.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/data_join/src/java/org/apache/hadoop/contrib/utils/join/TaggedMapOutput.java?rev=675784&r1=675783&r2=675784&view=diff
==============================================================================
---
hadoop/core/trunk/src/contrib/data_join/src/java/org/apache/hadoop/contrib/utils/join/TaggedMapOutput.java
(original)
+++
hadoop/core/trunk/src/contrib/data_join/src/java/org/apache/hadoop/contrib/utils/join/TaggedMapOutput.java
Thu Jul 10 15:15:55 2008
@@ -20,6 +20,8 @@
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.mapred.JobConf;
/**
* This abstract class serves as the base class for the values that
@@ -46,5 +48,9 @@
}
public abstract Writable getData();
+
+ public TaggedMapOutput clone(JobConf job) {
+ return (TaggedMapOutput) WritableUtils.clone(this, job);
+ }
}