TestTextOutputFormat.java

omalley Wed, 26 Dec 2007 23:02:36 -0800

Author: omalley
Date: Wed Dec 26 23:02:12 2007
New Revision: 607010

URL: http://svn.apache.org/viewvc?rev=607010&view=rev
Log:
HADOOP-2425.  Special case TextOutputFormat to specifically handle Text and 
NullWritable.


Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/NullWritable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java
    
lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=607010&r1=607009&r2=607010&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Wed Dec 26 23:02:12 2007
@@ -140,6 +140,10 @@
     non-static method to allow sub-classes to provide alternate
     implementations. (Alejandro Abdelnur via acmurthy) 
 
+    HADOOP-2425. Change TextOutputFormat to handle Text specifically for better
+    performance. Make NullWritable implement Comparable. Make TextOutputFormat
+    treat NullWritable like null. (omalley)
+
   OPTIMIZATIONS
 
     HADOOP-1898.  Release the lock protecting the last time of the last stack

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/io/NullWritable.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/io/NullWritable.java?rev=607010&r1=607009&r2=607010&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/io/NullWritable.java 
(original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/io/NullWritable.java Wed Dec 
26 23:02:12 2007
@@ -21,7 +21,7 @@
 import java.io.*;
 
 /** Singleton Writable with no data. */
-public class NullWritable implements Writable {
+public class NullWritable implements WritableComparable {
 
   private static final NullWritable THIS = new NullWritable();
 
@@ -34,6 +34,15 @@
     return "(null)";
   }
 
+  public int hashCode() { return 0; }
+  public int compareTo(Object other) {
+    if (!(other instanceof NullWritable)) {
+      throw new ClassCastException("can't compare " + 
other.getClass().getName() 
+                                   + " to NullWritable");
+    }
+    return 0;
+  }
+  public boolean equals(Object other) { return other instanceof NullWritable; }
   public void readFields(DataInput in) throws IOException {}
   public void write(DataOutput out) throws IOException {}
 }

Modified: 
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java?rev=607010&r1=607009&r2=607010&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java 
(original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java 
Wed Dec 26 23:02:12 2007
@@ -20,11 +20,14 @@
 
 import java.io.DataOutputStream;
 import java.io.IOException;
+import java.io.UnsupportedEncodingException;
 
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.FSDataOutputStream;
 
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.compress.CompressionCodec;
@@ -39,6 +42,17 @@
   protected static class LineRecordWriter<K extends WritableComparable,
                                           V extends Writable>
     implements RecordWriter<K, V> {
+    private static final String utf8 = "UTF-8";
+    private static final byte[] tab;
+    private static final byte[] newline;
+    static {
+      try {
+        tab = "\t".getBytes(utf8);
+        newline = "\n".getBytes(utf8);
+      } catch (UnsupportedEncodingException uee) {
+        throw new IllegalArgumentException("can't find " + utf8 + " encoding");
+      }
+    }
     
     private DataOutputStream out;
     
@@ -46,22 +60,39 @@
       this.out = out;
     }
     
+    /**
+     * Write the object to the byte stream, handling Text as a special
+     * case.
+     * @param o the object to print
+     * @throws IOException if the write throws, we pass it on
+     */
+    private void writeObject(Object o) throws IOException {
+      if (o instanceof Text) {
+        Text to = (Text) o;
+        out.write(to.getBytes(), 0, to.getLength());
+      } else {
+        out.write(o.toString().getBytes(utf8));
+      }
+    }
+
     public synchronized void write(K key, V value)
       throws IOException {
 
-      if (key == null && value == null) {
+      boolean nullKey = key == null || key instanceof NullWritable;
+      boolean nullValue = value == null || value instanceof NullWritable;
+      if (nullKey && nullValue) {
         return;
       }
-      if (key != null) {
-        out.write(key.toString().getBytes("UTF-8"));
+      if (!nullKey) {
+        writeObject(key);
       }
-      if (key != null && value != null) {
-        out.write("\t".getBytes("UTF-8"));
+      if (!(nullKey || nullValue)) {
+        out.write(tab);
       }
-      if (value != null) {
-        out.write(value.toString().getBytes("UTF-8"));
+      if (!nullValue) {
+        writeObject(value);
       }
-      out.writeByte('\n');
+      out.write(newline);
     }
 
     public synchronized void close(Reporter reporter) throws IOException {
@@ -82,7 +113,8 @@
       FSDataOutputStream fileOut = fs.create(new Path(dir, name), progress);
       return new LineRecordWriter<K, V>(fileOut);
     } else {
-      Class codecClass = getOutputCompressorClass(job, GzipCodec.class);
+      Class<? extends CompressionCodec> codecClass = 
+        getOutputCompressorClass(job, GzipCodec.class);
       // create the named codec
       CompressionCodec codec = (CompressionCodec)
         ReflectionUtils.newInstance(codecClass, job);

Modified: 
lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java?rev=607010&r1=607009&r2=607010&view=diff
==============================================================================
--- 
lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java 
(original)
+++ 
lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java 
Wed Dec 26 23:02:12 2007
@@ -19,19 +19,12 @@
 package org.apache.hadoop.mapred;
 
 import java.io.*;
-import java.util.*;
 import junit.framework.TestCase;
 
-import org.apache.commons.logging.*;
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.io.*;
-import org.apache.hadoop.io.compress.*;
-import org.apache.hadoop.util.ReflectionUtils;
 
 public class TestTextOutputFormat extends TestCase {
-  private static final Log LOG = LogFactory.getLog(TestTextOutputFormat.class
-                                                   .getName());
-
   private static JobConf defaultConf = new JobConf();
 
   private static FileSystem localFs = null;
@@ -43,9 +36,11 @@
     }
   }
 
-  private static Path workDir = new Path(new Path(System.getProperty(
-                                                                     
"test.build.data", "."), "data"), "TestTextOutputFormat");
+  private static Path workDir = 
+    new Path(new Path(System.getProperty("test.build.data", "."), "data"), 
+             "TestTextOutputFormat");
 
+  @SuppressWarnings("unchecked")
   public void testFormat() throws Exception {
     JobConf job = new JobConf();
     job.setOutputPath(workDir);
@@ -54,19 +49,22 @@
     // A reporter that does nothing
     Reporter reporter = Reporter.NULL;
 
-    TextOutputFormat<Text, Text> theOutputFormat =
-      new TextOutputFormat<Text, Text>();
-    RecordWriter<Text, Text> theRecordWriter =
+    TextOutputFormat theOutputFormat = new TextOutputFormat();
+    RecordWriter theRecordWriter =
       theOutputFormat.getRecordWriter(localFs, job, file, reporter);
 
     Text key1 = new Text("key1");
     Text key2 = new Text("key2");
     Text val1 = new Text("val1");
     Text val2 = new Text("val2");
+    NullWritable nullWritable = NullWritable.get();
 
     try {
       theRecordWriter.write(key1, val1);
+      theRecordWriter.write(null, nullWritable);
       theRecordWriter.write(null, val1);
+      theRecordWriter.write(nullWritable, val2);
+      theRecordWriter.write(key2, nullWritable);
       theRecordWriter.write(key1, null);
       theRecordWriter.write(null, null);
       theRecordWriter.write(key2, val2);
@@ -78,6 +76,8 @@
     StringBuffer expectedOutput = new StringBuffer();
     expectedOutput.append(key1).append('\t').append(val1).append("\n");
     expectedOutput.append(val1).append("\n");
+    expectedOutput.append(val2).append("\n");
+    expectedOutput.append(key2).append("\n");
     expectedOutput.append(key1).append("\n");
     expectedOutput.append(key2).append('\t').append(val2).append("\n");
     String output = UtilsForTests.slurp(expectedFile);

svn commit: r607010 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/io/NullWritable.java src/java/org/apache/hadoop/mapred/TextOutputFormat.java src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java

Reply via email to