Author: omalley Date: Wed Dec 26 23:02:12 2007 New Revision: 607010 URL: http://svn.apache.org/viewvc?rev=607010&view=rev Log: HADOOP-2425. Special case TextOutputFormat to specifically handle Text and NullWritable.
Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/io/NullWritable.java lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=607010&r1=607009&r2=607010&view=diff ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Wed Dec 26 23:02:12 2007 @@ -140,6 +140,10 @@ non-static method to allow sub-classes to provide alternate implementations. (Alejandro Abdelnur via acmurthy) + HADOOP-2425. Change TextOutputFormat to handle Text specifically for better + performance. Make NullWritable implement Comparable. Make TextOutputFormat + treat NullWritable like null. (omalley) + OPTIMIZATIONS HADOOP-1898. Release the lock protecting the last time of the last stack Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/io/NullWritable.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/io/NullWritable.java?rev=607010&r1=607009&r2=607010&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/io/NullWritable.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/io/NullWritable.java Wed Dec 26 23:02:12 2007 @@ -21,7 +21,7 @@ import java.io.*; /** Singleton Writable with no data. */ -public class NullWritable implements Writable { +public class NullWritable implements WritableComparable { private static final NullWritable THIS = new NullWritable(); @@ -34,6 +34,15 @@ return "(null)"; } + public int hashCode() { return 0; } + public int compareTo(Object other) { + if (!(other instanceof NullWritable)) { + throw new ClassCastException("can't compare " + other.getClass().getName() + + " to NullWritable"); + } + return 0; + } + public boolean equals(Object other) { return other instanceof NullWritable; } public void readFields(DataInput in) throws IOException {} public void write(DataOutput out) throws IOException {} } Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java?rev=607010&r1=607009&r2=607010&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java Wed Dec 26 23:02:12 2007 @@ -20,11 +20,14 @@ import java.io.DataOutputStream; import java.io.IOException; +import java.io.UnsupportedEncodingException; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.compress.CompressionCodec; @@ -39,6 +42,17 @@ protected static class LineRecordWriter<K extends WritableComparable, V extends Writable> implements RecordWriter<K, V> { + private static final String utf8 = "UTF-8"; + private static final byte[] tab; + private static final byte[] newline; + static { + try { + tab = "\t".getBytes(utf8); + newline = "\n".getBytes(utf8); + } catch (UnsupportedEncodingException uee) { + throw new IllegalArgumentException("can't find " + utf8 + " encoding"); + } + } private DataOutputStream out; @@ -46,22 +60,39 @@ this.out = out; } + /** + * Write the object to the byte stream, handling Text as a special + * case. + * @param o the object to print + * @throws IOException if the write throws, we pass it on + */ + private void writeObject(Object o) throws IOException { + if (o instanceof Text) { + Text to = (Text) o; + out.write(to.getBytes(), 0, to.getLength()); + } else { + out.write(o.toString().getBytes(utf8)); + } + } + public synchronized void write(K key, V value) throws IOException { - if (key == null && value == null) { + boolean nullKey = key == null || key instanceof NullWritable; + boolean nullValue = value == null || value instanceof NullWritable; + if (nullKey && nullValue) { return; } - if (key != null) { - out.write(key.toString().getBytes("UTF-8")); + if (!nullKey) { + writeObject(key); } - if (key != null && value != null) { - out.write("\t".getBytes("UTF-8")); + if (!(nullKey || nullValue)) { + out.write(tab); } - if (value != null) { - out.write(value.toString().getBytes("UTF-8")); + if (!nullValue) { + writeObject(value); } - out.writeByte('\n'); + out.write(newline); } public synchronized void close(Reporter reporter) throws IOException { @@ -82,7 +113,8 @@ FSDataOutputStream fileOut = fs.create(new Path(dir, name), progress); return new LineRecordWriter<K, V>(fileOut); } else { - Class codecClass = getOutputCompressorClass(job, GzipCodec.class); + Class<? extends CompressionCodec> codecClass = + getOutputCompressorClass(job, GzipCodec.class); // create the named codec CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, job); Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java?rev=607010&r1=607009&r2=607010&view=diff ============================================================================== --- lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java (original) +++ lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java Wed Dec 26 23:02:12 2007 @@ -19,19 +19,12 @@ package org.apache.hadoop.mapred; import java.io.*; -import java.util.*; import junit.framework.TestCase; -import org.apache.commons.logging.*; import org.apache.hadoop.fs.*; import org.apache.hadoop.io.*; -import org.apache.hadoop.io.compress.*; -import org.apache.hadoop.util.ReflectionUtils; public class TestTextOutputFormat extends TestCase { - private static final Log LOG = LogFactory.getLog(TestTextOutputFormat.class - .getName()); - private static JobConf defaultConf = new JobConf(); private static FileSystem localFs = null; @@ -43,9 +36,11 @@ } } - private static Path workDir = new Path(new Path(System.getProperty( - "test.build.data", "."), "data"), "TestTextOutputFormat"); + private static Path workDir = + new Path(new Path(System.getProperty("test.build.data", "."), "data"), + "TestTextOutputFormat"); + @SuppressWarnings("unchecked") public void testFormat() throws Exception { JobConf job = new JobConf(); job.setOutputPath(workDir); @@ -54,19 +49,22 @@ // A reporter that does nothing Reporter reporter = Reporter.NULL; - TextOutputFormat<Text, Text> theOutputFormat = - new TextOutputFormat<Text, Text>(); - RecordWriter<Text, Text> theRecordWriter = + TextOutputFormat theOutputFormat = new TextOutputFormat(); + RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter); Text key1 = new Text("key1"); Text key2 = new Text("key2"); Text val1 = new Text("val1"); Text val2 = new Text("val2"); + NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key1, val1); + theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val1); + theRecordWriter.write(nullWritable, val2); + theRecordWriter.write(key2, nullWritable); theRecordWriter.write(key1, null); theRecordWriter.write(null, null); theRecordWriter.write(key2, val2); @@ -78,6 +76,8 @@ StringBuffer expectedOutput = new StringBuffer(); expectedOutput.append(key1).append('\t').append(val1).append("\n"); expectedOutput.append(val1).append("\n"); + expectedOutput.append(val2).append("\n"); + expectedOutput.append(key2).append("\n"); expectedOutput.append(key1).append("\n"); expectedOutput.append(key2).append('\t').append(val2).append("\n"); String output = UtilsForTests.slurp(expectedFile);