Author: cutting
Date: Tue Apr 19 20:48:37 2011
New Revision: 1095207

URL: http://svn.apache.org/viewvc?rev=1095207&view=rev
Log:
AVRO-799. Java: Add support for --codec parameter to the 'fromtext' command.

Added:
    
avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestTextFileTools.java
Modified:
    avro/trunk/CHANGES.txt
    
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/FromTextTool.java
    
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/ToTextTool.java

Modified: avro/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1095207&r1=1095206&r2=1095207&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Tue Apr 19 20:48:37 2011
@@ -46,6 +46,10 @@ Avro 1.5.1 (unreleased)
     AVRO-802. Java: Add documentation for non-Avro input, map-only
     jobs. (cutting)
 
+    AVRO-799. Java: Add support for --codec parameter to the
+    'fromtext' command.  Also made some performance improvements, bug
+    fixes and added tests for this command. (cutting)
+
   BUG FIXES
 
     AVRO-786. Java: Fix equals() to work on objects containing maps. (cutting)

Modified: 
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/FromTextTool.java
URL: 
http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/FromTextTool.java?rev=1095207&r1=1095206&r2=1095207&view=diff
==============================================================================
--- 
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/FromTextTool.java 
(original)
+++ 
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/FromTextTool.java 
Tue Apr 19 20:48:37 2011
@@ -19,9 +19,7 @@ package org.apache.avro.tool;
 
 import java.io.BufferedInputStream;
 import java.io.BufferedOutputStream;
-import java.io.BufferedReader;
 import java.io.InputStream;
-import java.io.InputStreamReader;
 import java.io.PrintStream;
 import java.nio.ByteBuffer;
 import java.util.List;
@@ -34,6 +32,7 @@ import org.apache.avro.Schema;
 import org.apache.avro.file.CodecFactory;
 import org.apache.avro.file.DataFileWriter;
 import org.apache.avro.generic.GenericDatumWriter;
+import static org.apache.avro.file.DataFileConstants.DEFLATE_CODEC;
 
 /** Reads a text file into an Avro data file.
  * 
@@ -61,36 +60,68 @@ public class FromTextTool implements Too
     OptionSpec<Integer> level = p.accepts("level", "compression level")
     .withOptionalArg().ofType(Integer.class);
 
+    OptionSpec<String> codec = p.accepts("codec", "compression codec")
+    .withOptionalArg().ofType(String.class);
+
     OptionSet opts = p.parse(args.toArray(new String[0]));
 
-    if (opts.nonOptionArguments().size() != 2) {
+    List<String> nargs = opts.nonOptionArguments();
+    if (nargs.size() != 2) {
       err.println("Expected 2 args: from_file to_file (local filenames," +
           " Hadoop URI's, or '-' for stdin/stdout");
       p.printHelpOn(err);
       return 1;
     }
  
-    BufferedInputStream inStream = Util.fileOrStdin(args.get(0), stdin);
-    BufferedOutputStream outStream = Util.fileOrStdout(args.get(1), out);
-    
     int compressionLevel = 1; // Default compression level
     if (opts.hasArgument(level)) {
       compressionLevel = level.value(opts);
     }
   
-    BufferedReader reader = new BufferedReader(new 
InputStreamReader(inStream));
+    String codecName = opts.hasArgument(codec)
+      ? codec.value(opts)
+      : DEFLATE_CODEC;
+    CodecFactory codecFactory = codecName.equals(DEFLATE_CODEC)
+      ? CodecFactory.deflateCodec(compressionLevel)
+      : CodecFactory.fromString(codecName);
+
+    BufferedInputStream inStream = Util.fileOrStdin(nargs.get(0), stdin);
+    BufferedOutputStream outStream = Util.fileOrStdout(nargs.get(1), out);
+    
     DataFileWriter<ByteBuffer> writer =
         new DataFileWriter<ByteBuffer>(new GenericDatumWriter<ByteBuffer>());
-    writer.setCodec(CodecFactory.deflateCodec(compressionLevel));
+    writer.setCodec(codecFactory);
     writer.create(Schema.parse(TEXT_FILE_SCHEMA), outStream);
 
-    String line;
-    while((line = reader.readLine()) != null) {
-      ByteBuffer buff = ByteBuffer.wrap(line.getBytes());
-      writer.append(buff);
+    ByteBuffer line = ByteBuffer.allocate(128);
+    boolean returnSeen = false;
+    byte[] buf = new byte[8192];
+    for (int end = inStream.read(buf); end != -1; end = inStream.read(buf)) {
+      for (int i = 0; i < end; i++) {
+        int b = buf[i] & 0xFF;
+        if (b == '\n') {                          // newline
+          System.out.println("Writing line = "+line.position());
+          line.flip();
+          writer.append(line);
+          line.clear();
+          returnSeen = false;
+        } else if (b == '\r') {                   // return
+          line.flip();
+          writer.append(line);
+          line.clear();
+          returnSeen = true;
+        } else {
+          if (line.position() == line.limit()) {    // reallocate longer line
+            ByteBuffer tempLine = ByteBuffer.allocate(line.limit()*2);
+            line.flip();
+            tempLine.put(line);
+            line = tempLine;
+          }
+          line.put((byte)b);
+          returnSeen = false;
+        }
+      }
     }
-    
-    writer.flush();
     writer.close();
     inStream.close();
     return 0;

Modified: 
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/ToTextTool.java
URL: 
http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/ToTextTool.java?rev=1095207&r1=1095206&r2=1095207&view=diff
==============================================================================
--- 
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/ToTextTool.java 
(original)
+++ 
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/ToTextTool.java 
Tue Apr 19 20:48:37 2011
@@ -35,7 +35,7 @@ import org.apache.avro.generic.GenericDa
 public class ToTextTool implements Tool {
   private static final String TEXT_FILE_SCHEMA = 
         "\"bytes\"";
-  private static final byte[] LINE_SEPERATOR = 
+  private static final byte[] LINE_SEPARATOR = 
         System.getProperty("line.separator").getBytes();
     
   @Override
@@ -45,7 +45,7 @@ public class ToTextTool implements Tool 
 
   @Override
   public String getShortDescription() {
-    return "Converts and avro file to a text file.";
+    return "Converts an Avro data file to a text file.";
   }
 
   @Override
@@ -77,7 +77,7 @@ public class ToTextTool implements Tool 
     while (fileReader.hasNext()) {
       ByteBuffer outBuff = (ByteBuffer) fileReader.next();
       outStream.write(outBuff.array());
-      outStream.write(LINE_SEPERATOR);
+      outStream.write(LINE_SEPARATOR);
     }
     
     outStream.close();

Added: 
avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestTextFileTools.java
URL: 
http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestTextFileTools.java?rev=1095207&view=auto
==============================================================================
--- 
avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestTextFileTools.java
 (added)
+++ 
avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestTextFileTools.java
 Tue Apr 19 20:48:37 2011
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.OutputStream;
+import java.io.InputStream;
+import java.io.FileOutputStream;
+import java.io.FileInputStream;
+import java.io.BufferedOutputStream;
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.avro.AvroTestUtil;
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumReader;
+import org.junit.BeforeClass;
+import org.junit.AfterClass;
+import org.junit.Test;
+
+@SuppressWarnings("deprecation")
+public class TestTextFileTools {
+  private static final int COUNT =
+    Integer.parseInt(System.getProperty("test.count", "10"));
+
+  static File linesFile;
+  static ByteBuffer[] lines;
+  static Schema schema;
+  static File schemaFile;
+  
+  @BeforeClass
+  public static void writeRandomFile() throws IOException {
+    schema = Schema.create(Type.BYTES);
+    lines = new ByteBuffer[COUNT];
+    linesFile = AvroTestUtil.tempFile("random.lines");
+
+    OutputStream out =
+      new BufferedOutputStream(new FileOutputStream(linesFile));
+    Random rand = new Random();
+    for (int j = 0; j < COUNT; j++) {
+      byte[] line = new byte[rand.nextInt(512)];
+      System.out.println("Creating line = "+line.length);
+      for (int i = 0; i < line.length; i++) {
+        int b = rand.nextInt(256);
+        while (b == '\n' || b == '\r')
+          b = rand.nextInt(256);
+        line[i] = (byte)b;
+      }
+      out.write(line);
+      out.write('\n');
+      lines[j] = ByteBuffer.wrap(line);
+    }
+    out.close();
+  }
+  
+  private void fromText(String name, String... args) throws Exception {
+    File avroFile = AvroTestUtil.tempFile(name + ".avro");
+
+    ArrayList<String> arglist = new ArrayList<String>();
+    arglist.addAll(Arrays.asList(args));
+    arglist.add(linesFile.toString());
+    arglist.add(avroFile.toString());
+
+    new FromTextTool().run(null, null, null, arglist);
+    
+    // Read it back, and make sure it's valid.
+    DataFileReader<ByteBuffer> file = new DataFileReader<ByteBuffer>
+      (avroFile, new GenericDatumReader<ByteBuffer>());
+    int i = 0;
+    for (ByteBuffer line : file) {
+      System.out.println("Reading line = "+line.remaining());
+      assertEquals(line, lines[i]);
+      i++;
+    }
+    assertEquals(COUNT, i);
+  }
+  
+  @Test
+  public void testFromText() throws Exception {
+    fromText("null", "--codec", "null");
+    fromText("deflate", "--codec", "deflate");
+    fromText("snappy", "--codec", "snappy");
+  }
+
+  @AfterClass
+  public static void testToText() throws Exception {
+    toText("null");
+    toText("deflate");
+    toText("snappy");
+  }
+
+  private static void toText(String name) throws Exception {
+    File avroFile = AvroTestUtil.tempFile(name + ".avro");
+    File outFile = AvroTestUtil.tempFile(name + ".lines");
+
+    ArrayList<String> arglist = new ArrayList<String>();
+    arglist.add(avroFile.toString());
+    arglist.add(outFile.toString());
+
+    new ToTextTool().run(null, null, null, arglist);
+    
+    // Read it back, and make sure it's valid.
+    InputStream orig = new BufferedInputStream(new FileInputStream(linesFile));
+    InputStream after = new BufferedInputStream(new FileInputStream(outFile));
+
+    int b;
+    while ((b = orig.read()) != -1)
+      assertEquals(b, after.read());
+    assertEquals(-1, after.read());
+
+    orig.close();
+    after.close();
+  }
+
+}


Reply via email to