Author: cutting
Date: Fri Jun 18 19:06:15 2010
New Revision: 956098

URL: http://svn.apache.org/viewvc?rev=956098&view=rev
Log:
AVRO-567.  Add command-line tools for text file import and export.  Contributed 
by Patrick Wendell.

Added:
    avro/trunk/lang/java/src/java/org/apache/avro/tool/FromTextTool.java
    avro/trunk/lang/java/src/java/org/apache/avro/tool/ToTextTool.java
Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/java/ivy.xml
    avro/trunk/lang/java/src/java/org/apache/avro/tool/Main.java
    avro/trunk/lang/java/src/java/org/apache/avro/tool/Util.java

Modified: avro/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=956098&r1=956097&r2=956098&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Fri Jun 18 19:06:15 2010
@@ -16,6 +16,9 @@ Avro 1.4.0 (unreleased)
     AVRO-577. Java: Add MapReduce InputFormat for plain text files.
     (Tom White via cutting)
 
+    AVRO-567. Add command-line tools for text file import & export.
+    (Patrick Wendell via cutting)
+
   IMPROVEMENTS
 
     AVRO-501. missing function in C api to access array elements after 

Modified: avro/trunk/lang/java/ivy.xml
URL: 
http://svn.apache.org/viewvc/avro/trunk/lang/java/ivy.xml?rev=956098&r1=956097&r2=956098&view=diff
==============================================================================
--- avro/trunk/lang/java/ivy.xml (original)
+++ avro/trunk/lang/java/ivy.xml Fri Jun 18 19:06:15 2010
@@ -62,7 +62,7 @@
     <dependency org="net.sf.jopt-simple" name="jopt-simple" rev="3.2"
         conf="build->default;test->default;tools->default"/>
     <dependency org="org.apache.hadoop" name="hadoop-core" rev="0.20.2"
-                conf="build->default;tools->default" transitive="false"/>
+                conf="build->default;test->default;tools->default" 
transitive="false"/>
     <dependency org="commons-httpclient" name="commons-httpclient" rev="3.0.1"
                conf="test->default;tools->default"/>
   </dependencies>

Added: avro/trunk/lang/java/src/java/org/apache/avro/tool/FromTextTool.java
URL: 
http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/tool/FromTextTool.java?rev=956098&view=auto
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/tool/FromTextTool.java (added)
+++ avro/trunk/lang/java/src/java/org/apache/avro/tool/FromTextTool.java Fri 
Jun 18 19:06:15 2010
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.PrintStream;
+import java.nio.ByteBuffer;
+import java.util.List;
+
+import joptsimple.OptionParser;
+import joptsimple.OptionSet;
+import joptsimple.OptionSpec;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.CodecFactory;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumWriter;
+
+/** Reads a text file into an Avro data file.
+ * 
+ * Can accept a file name, and HDFS file URI, or stdin. Can write to a file
+ * name, an HDFS URI, or stdout.*/
+public class FromTextTool implements Tool {
+  private static final String TEXT_FILE_SCHEMA = 
+    "\"bytes\"";
+  
+  @Override
+  public String getName() {
+    return "fromtext";
+  }
+
+  @Override
+  public String getShortDescription() {
+    return "Imports a text file into an avro data file.";
+  }
+
+  @Override
+  public int run(InputStream stdin, PrintStream out, PrintStream err,
+      List<String> args) throws Exception {
+    
+    OptionParser p = new OptionParser();
+    OptionSpec<Integer> level = p.accepts("level", "compression level")
+    .withOptionalArg().ofType(Integer.class);
+
+    OptionSet opts = p.parse(args.toArray(new String[0]));
+
+    if (opts.nonOptionArguments().size() != 2) {
+      err.println("Expected 2 args: from_file to_file (local filenames," +
+          " Hadoop URI's, or '-' for stdin/stdout");
+      p.printHelpOn(err);
+      return 1;
+    }
+ 
+    BufferedInputStream inStream = Util.fileOrStdin(args.get(0), stdin);
+    BufferedOutputStream outStream = Util.fileOrStdout(args.get(1), out);
+    
+    int compressionLevel = 1; // Default compression level
+    if (opts.hasArgument(level)) {
+      compressionLevel = level.value(opts);
+    }
+  
+    BufferedReader reader = new BufferedReader(new 
InputStreamReader(inStream));
+    DataFileWriter<ByteBuffer> writer =
+        new DataFileWriter<ByteBuffer>(new GenericDatumWriter<ByteBuffer>());
+    writer.setCodec(CodecFactory.deflateCodec(compressionLevel));
+    writer.create(Schema.parse(TEXT_FILE_SCHEMA), outStream);
+
+    String line;
+    while((line = reader.readLine()) != null) {
+      ByteBuffer buff = ByteBuffer.wrap(line.getBytes());
+      writer.append(buff);
+    }
+    
+    writer.flush();
+    writer.close();
+    inStream.close();
+    return 0;
+  }
+
+}

Modified: avro/trunk/lang/java/src/java/org/apache/avro/tool/Main.java
URL: 
http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/tool/Main.java?rev=956098&r1=956097&r2=956098&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/tool/Main.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/tool/Main.java Fri Jun 18 
19:06:15 2010
@@ -23,9 +23,9 @@ import java.util.TreeMap;
 
 import java.io.InputStream;
 
+import org.apache.avro.mapred.tether.TetherJob;
 import org.apache.avro.reflect.InduceSchemaTool;
 import org.apache.avro.specific.SpecificCompiler.SpecificCompilerTool;
-import org.apache.avro.mapred.tether.TetherJob;
 
 /** Command-line driver.*/
 public class Main {
@@ -49,6 +49,8 @@ public class Main {
         new GenAvroTool(),
         new RpcReceiveTool(),
         new RpcSendTool(),
+        new FromTextTool(),
+        new ToTextTool(),
         new TetherJob()
         }) {
       Tool prev = tools.put(tool.getName(), tool);

Added: avro/trunk/lang/java/src/java/org/apache/avro/tool/ToTextTool.java
URL: 
http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/tool/ToTextTool.java?rev=956098&view=auto
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/tool/ToTextTool.java (added)
+++ avro/trunk/lang/java/src/java/org/apache/avro/tool/ToTextTool.java Fri Jun 
18 19:06:15 2010
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.nio.ByteBuffer;
+import java.util.List;
+
+import joptsimple.OptionParser;
+import joptsimple.OptionSet;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.generic.GenericDatumReader;
+
+/** Reads an avro data file into a plain text file. */
+public class ToTextTool implements Tool {
+  private static final String TEXT_FILE_SCHEMA = 
+        "\"bytes\"";
+  private static final byte[] LINE_SEPERATOR = 
+        System.getProperty("line.separator").getBytes();
+    
+  @Override
+  public String getName() {
+    return "totext";
+  }
+
+  @Override
+  public String getShortDescription() {
+    return "Converts and avro file to a text file.";
+  }
+
+  @Override
+  public int run(InputStream stdin, PrintStream out, PrintStream err,
+      List<String> args) throws Exception {
+      
+    OptionParser p = new OptionParser();
+    OptionSet opts = p.parse(args.toArray(new String[0]));
+    if (opts.nonOptionArguments().size() != 2) {
+      err.println("Expected 2 args: from_file to_file (local filenames," +
+      " Hadoop URI's, or '-' for stdin/stdout");
+      p.printHelpOn(err);
+      return 1;
+    }
+
+    BufferedInputStream inStream = Util.fileOrStdin(args.get(0), stdin);
+    BufferedOutputStream outStream = Util.fileOrStdout(args.get(1), out);
+
+    GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
+    DataFileStream<Object> fileReader =
+      new DataFileStream<Object>(inStream, reader);
+
+    if (!fileReader.getSchema().equals(Schema.parse(TEXT_FILE_SCHEMA))) {
+      err.println("Avro file is not generic text schema");
+      p.printHelpOn(err);
+      return 1;
+    }
+    
+    while (fileReader.hasNext()) {
+      ByteBuffer outBuff = (ByteBuffer) fileReader.next();
+      outStream.write(outBuff.array());
+      outStream.write(LINE_SEPERATOR);
+    }
+    
+    outStream.close();
+    inStream.close();
+    return 0;
+  }
+
+}

Modified: avro/trunk/lang/java/src/java/org/apache/avro/tool/Util.java
URL: 
http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/tool/Util.java?rev=956098&r1=956097&r2=956098&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/tool/Util.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/tool/Util.java Fri Jun 18 
19:06:15 2010
@@ -17,29 +17,77 @@
  */
 package org.apache.avro.tool;
 
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URI;
 
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericDatumReader;
 import org.apache.avro.io.JsonDecoder;
 import org.apache.avro.file.DataFileReader;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 
 /** Static utility methods for tools. */
 class Util {
   /**
-   * Returns stdin if filename is "-", else opens the file
+   * Returns stdin if filename is "-", else opens the local or HDFS file
    * and returns an InputStream for it.
+   * @throws IOException 
    */
-  static InputStream fileOrStdin(String filename, InputStream stdin) 
-      throws FileNotFoundException {
+  static BufferedInputStream fileOrStdin(String filename, InputStream stdin) 
+      throws IOException {
     if (filename.equals("-")) {
-      return stdin;
-    } else {
-      return new FileInputStream(new File(filename));
+      return new BufferedInputStream(stdin);
+    } 
+    else {
+      String[] parts = filename.split(":");
+      if (parts.length == 1) {
+        return new BufferedInputStream(new FileInputStream(new 
File(filename)));
+      }
+      else if (parts[0].equals("hdfs")) {
+        FileSystem fs = FileSystem.get(
+            URI.create(filename), new Configuration());
+        return new BufferedInputStream(fs.open(new Path(filename)));
+      }
+      else {
+        throw new FileNotFoundException();
+      }
+    }
+  }
+  
+  /**
+   * Returns stdout if filename is "-", else opens the local or HDFS file
+   * and returns an OutputStream for it.
+   * @throws IOException 
+   */
+  static BufferedOutputStream fileOrStdout(String filename, OutputStream 
stdout) 
+  throws IOException {
+    if (filename.equals("-")) {
+      return new BufferedOutputStream(stdout);
+    } 
+    else {
+      String[] parts = filename.split(":");
+      if (parts.length == 1) {
+        return new BufferedOutputStream(
+            new FileOutputStream(new File(filename)));
+      }
+      else if (parts[0].equals("hdfs")) {
+        FileSystem fs = FileSystem.get(
+            URI.create(filename), new Configuration());
+        return new BufferedOutputStream(fs.create(new Path(filename)));
+      }
+      else {
+        throw new FileNotFoundException();
+      }
     }
   }
   


Reply via email to