Author: cutting
Date: Fri Jun 18 19:06:15 2010
New Revision: 956098
URL: http://svn.apache.org/viewvc?rev=956098&view=rev
Log:
AVRO-567. Add command-line tools for text file import and export. Contributed
by Patrick Wendell.
Added:
avro/trunk/lang/java/src/java/org/apache/avro/tool/FromTextTool.java
avro/trunk/lang/java/src/java/org/apache/avro/tool/ToTextTool.java
Modified:
avro/trunk/CHANGES.txt
avro/trunk/lang/java/ivy.xml
avro/trunk/lang/java/src/java/org/apache/avro/tool/Main.java
avro/trunk/lang/java/src/java/org/apache/avro/tool/Util.java
Modified: avro/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=956098&r1=956097&r2=956098&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Fri Jun 18 19:06:15 2010
@@ -16,6 +16,9 @@ Avro 1.4.0 (unreleased)
AVRO-577. Java: Add MapReduce InputFormat for plain text files.
(Tom White via cutting)
+ AVRO-567. Add command-line tools for text file import & export.
+ (Patrick Wendell via cutting)
+
IMPROVEMENTS
AVRO-501. missing function in C api to access array elements after
Modified: avro/trunk/lang/java/ivy.xml
URL:
http://svn.apache.org/viewvc/avro/trunk/lang/java/ivy.xml?rev=956098&r1=956097&r2=956098&view=diff
==============================================================================
--- avro/trunk/lang/java/ivy.xml (original)
+++ avro/trunk/lang/java/ivy.xml Fri Jun 18 19:06:15 2010
@@ -62,7 +62,7 @@
<dependency org="net.sf.jopt-simple" name="jopt-simple" rev="3.2"
conf="build->default;test->default;tools->default"/>
<dependency org="org.apache.hadoop" name="hadoop-core" rev="0.20.2"
- conf="build->default;tools->default" transitive="false"/>
+ conf="build->default;test->default;tools->default"
transitive="false"/>
<dependency org="commons-httpclient" name="commons-httpclient" rev="3.0.1"
conf="test->default;tools->default"/>
</dependencies>
Added: avro/trunk/lang/java/src/java/org/apache/avro/tool/FromTextTool.java
URL:
http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/tool/FromTextTool.java?rev=956098&view=auto
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/tool/FromTextTool.java (added)
+++ avro/trunk/lang/java/src/java/org/apache/avro/tool/FromTextTool.java Fri
Jun 18 19:06:15 2010
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.PrintStream;
+import java.nio.ByteBuffer;
+import java.util.List;
+
+import joptsimple.OptionParser;
+import joptsimple.OptionSet;
+import joptsimple.OptionSpec;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.CodecFactory;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumWriter;
+
+/** Reads a text file into an Avro data file.
+ *
+ * Can accept a file name, and HDFS file URI, or stdin. Can write to a file
+ * name, an HDFS URI, or stdout.*/
+public class FromTextTool implements Tool {
+ private static final String TEXT_FILE_SCHEMA =
+ "\"bytes\"";
+
+ @Override
+ public String getName() {
+ return "fromtext";
+ }
+
+ @Override
+ public String getShortDescription() {
+ return "Imports a text file into an avro data file.";
+ }
+
+ @Override
+ public int run(InputStream stdin, PrintStream out, PrintStream err,
+ List<String> args) throws Exception {
+
+ OptionParser p = new OptionParser();
+ OptionSpec<Integer> level = p.accepts("level", "compression level")
+ .withOptionalArg().ofType(Integer.class);
+
+ OptionSet opts = p.parse(args.toArray(new String[0]));
+
+ if (opts.nonOptionArguments().size() != 2) {
+ err.println("Expected 2 args: from_file to_file (local filenames," +
+ " Hadoop URI's, or '-' for stdin/stdout");
+ p.printHelpOn(err);
+ return 1;
+ }
+
+ BufferedInputStream inStream = Util.fileOrStdin(args.get(0), stdin);
+ BufferedOutputStream outStream = Util.fileOrStdout(args.get(1), out);
+
+ int compressionLevel = 1; // Default compression level
+ if (opts.hasArgument(level)) {
+ compressionLevel = level.value(opts);
+ }
+
+ BufferedReader reader = new BufferedReader(new
InputStreamReader(inStream));
+ DataFileWriter<ByteBuffer> writer =
+ new DataFileWriter<ByteBuffer>(new GenericDatumWriter<ByteBuffer>());
+ writer.setCodec(CodecFactory.deflateCodec(compressionLevel));
+ writer.create(Schema.parse(TEXT_FILE_SCHEMA), outStream);
+
+ String line;
+ while((line = reader.readLine()) != null) {
+ ByteBuffer buff = ByteBuffer.wrap(line.getBytes());
+ writer.append(buff);
+ }
+
+ writer.flush();
+ writer.close();
+ inStream.close();
+ return 0;
+ }
+
+}
Modified: avro/trunk/lang/java/src/java/org/apache/avro/tool/Main.java
URL:
http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/tool/Main.java?rev=956098&r1=956097&r2=956098&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/tool/Main.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/tool/Main.java Fri Jun 18
19:06:15 2010
@@ -23,9 +23,9 @@ import java.util.TreeMap;
import java.io.InputStream;
+import org.apache.avro.mapred.tether.TetherJob;
import org.apache.avro.reflect.InduceSchemaTool;
import org.apache.avro.specific.SpecificCompiler.SpecificCompilerTool;
-import org.apache.avro.mapred.tether.TetherJob;
/** Command-line driver.*/
public class Main {
@@ -49,6 +49,8 @@ public class Main {
new GenAvroTool(),
new RpcReceiveTool(),
new RpcSendTool(),
+ new FromTextTool(),
+ new ToTextTool(),
new TetherJob()
}) {
Tool prev = tools.put(tool.getName(), tool);
Added: avro/trunk/lang/java/src/java/org/apache/avro/tool/ToTextTool.java
URL:
http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/tool/ToTextTool.java?rev=956098&view=auto
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/tool/ToTextTool.java (added)
+++ avro/trunk/lang/java/src/java/org/apache/avro/tool/ToTextTool.java Fri Jun
18 19:06:15 2010
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.nio.ByteBuffer;
+import java.util.List;
+
+import joptsimple.OptionParser;
+import joptsimple.OptionSet;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.generic.GenericDatumReader;
+
+/** Reads an avro data file into a plain text file. */
+public class ToTextTool implements Tool {
+ private static final String TEXT_FILE_SCHEMA =
+ "\"bytes\"";
+ private static final byte[] LINE_SEPERATOR =
+ System.getProperty("line.separator").getBytes();
+
+ @Override
+ public String getName() {
+ return "totext";
+ }
+
+ @Override
+ public String getShortDescription() {
+ return "Converts and avro file to a text file.";
+ }
+
+ @Override
+ public int run(InputStream stdin, PrintStream out, PrintStream err,
+ List<String> args) throws Exception {
+
+ OptionParser p = new OptionParser();
+ OptionSet opts = p.parse(args.toArray(new String[0]));
+ if (opts.nonOptionArguments().size() != 2) {
+ err.println("Expected 2 args: from_file to_file (local filenames," +
+ " Hadoop URI's, or '-' for stdin/stdout");
+ p.printHelpOn(err);
+ return 1;
+ }
+
+ BufferedInputStream inStream = Util.fileOrStdin(args.get(0), stdin);
+ BufferedOutputStream outStream = Util.fileOrStdout(args.get(1), out);
+
+ GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
+ DataFileStream<Object> fileReader =
+ new DataFileStream<Object>(inStream, reader);
+
+ if (!fileReader.getSchema().equals(Schema.parse(TEXT_FILE_SCHEMA))) {
+ err.println("Avro file is not generic text schema");
+ p.printHelpOn(err);
+ return 1;
+ }
+
+ while (fileReader.hasNext()) {
+ ByteBuffer outBuff = (ByteBuffer) fileReader.next();
+ outStream.write(outBuff.array());
+ outStream.write(LINE_SEPERATOR);
+ }
+
+ outStream.close();
+ inStream.close();
+ return 0;
+ }
+
+}
Modified: avro/trunk/lang/java/src/java/org/apache/avro/tool/Util.java
URL:
http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/tool/Util.java?rev=956098&r1=956097&r2=956098&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/tool/Util.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/tool/Util.java Fri Jun 18
19:06:15 2010
@@ -17,29 +17,77 @@
*/
package org.apache.avro.tool;
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URI;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.io.JsonDecoder;
import org.apache.avro.file.DataFileReader;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
/** Static utility methods for tools. */
class Util {
/**
- * Returns stdin if filename is "-", else opens the file
+ * Returns stdin if filename is "-", else opens the local or HDFS file
* and returns an InputStream for it.
+ * @throws IOException
*/
- static InputStream fileOrStdin(String filename, InputStream stdin)
- throws FileNotFoundException {
+ static BufferedInputStream fileOrStdin(String filename, InputStream stdin)
+ throws IOException {
if (filename.equals("-")) {
- return stdin;
- } else {
- return new FileInputStream(new File(filename));
+ return new BufferedInputStream(stdin);
+ }
+ else {
+ String[] parts = filename.split(":");
+ if (parts.length == 1) {
+ return new BufferedInputStream(new FileInputStream(new
File(filename)));
+ }
+ else if (parts[0].equals("hdfs")) {
+ FileSystem fs = FileSystem.get(
+ URI.create(filename), new Configuration());
+ return new BufferedInputStream(fs.open(new Path(filename)));
+ }
+ else {
+ throw new FileNotFoundException();
+ }
+ }
+ }
+
+ /**
+ * Returns stdout if filename is "-", else opens the local or HDFS file
+ * and returns an OutputStream for it.
+ * @throws IOException
+ */
+ static BufferedOutputStream fileOrStdout(String filename, OutputStream
stdout)
+ throws IOException {
+ if (filename.equals("-")) {
+ return new BufferedOutputStream(stdout);
+ }
+ else {
+ String[] parts = filename.split(":");
+ if (parts.length == 1) {
+ return new BufferedOutputStream(
+ new FileOutputStream(new File(filename)));
+ }
+ else if (parts[0].equals("hdfs")) {
+ FileSystem fs = FileSystem.get(
+ URI.create(filename), new Configuration());
+ return new BufferedOutputStream(fs.create(new Path(filename)));
+ }
+ else {
+ throw new FileNotFoundException();
+ }
}
}