Author: cutting
Date: Tue Apr 19 20:48:37 2011
New Revision: 1095207
URL: http://svn.apache.org/viewvc?rev=1095207&view=rev
Log:
AVRO-799. Java: Add support for --codec parameter to the 'fromtext' command.
Added:
avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestTextFileTools.java
Modified:
avro/trunk/CHANGES.txt
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/FromTextTool.java
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/ToTextTool.java
Modified: avro/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1095207&r1=1095206&r2=1095207&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Tue Apr 19 20:48:37 2011
@@ -46,6 +46,10 @@ Avro 1.5.1 (unreleased)
AVRO-802. Java: Add documentation for non-Avro input, map-only
jobs. (cutting)
+ AVRO-799. Java: Add support for --codec parameter to the
+ 'fromtext' command. Also made some performance improvements, bug
+ fixes and added tests for this command. (cutting)
+
BUG FIXES
AVRO-786. Java: Fix equals() to work on objects containing maps. (cutting)
Modified:
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/FromTextTool.java
URL:
http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/FromTextTool.java?rev=1095207&r1=1095206&r2=1095207&view=diff
==============================================================================
---
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/FromTextTool.java
(original)
+++
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/FromTextTool.java
Tue Apr 19 20:48:37 2011
@@ -19,9 +19,7 @@ package org.apache.avro.tool;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
-import java.io.BufferedReader;
import java.io.InputStream;
-import java.io.InputStreamReader;
import java.io.PrintStream;
import java.nio.ByteBuffer;
import java.util.List;
@@ -34,6 +32,7 @@ import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumWriter;
+import static org.apache.avro.file.DataFileConstants.DEFLATE_CODEC;
/** Reads a text file into an Avro data file.
*
@@ -61,36 +60,68 @@ public class FromTextTool implements Too
OptionSpec<Integer> level = p.accepts("level", "compression level")
.withOptionalArg().ofType(Integer.class);
+ OptionSpec<String> codec = p.accepts("codec", "compression codec")
+ .withOptionalArg().ofType(String.class);
+
OptionSet opts = p.parse(args.toArray(new String[0]));
- if (opts.nonOptionArguments().size() != 2) {
+ List<String> nargs = opts.nonOptionArguments();
+ if (nargs.size() != 2) {
err.println("Expected 2 args: from_file to_file (local filenames," +
" Hadoop URI's, or '-' for stdin/stdout");
p.printHelpOn(err);
return 1;
}
- BufferedInputStream inStream = Util.fileOrStdin(args.get(0), stdin);
- BufferedOutputStream outStream = Util.fileOrStdout(args.get(1), out);
-
int compressionLevel = 1; // Default compression level
if (opts.hasArgument(level)) {
compressionLevel = level.value(opts);
}
- BufferedReader reader = new BufferedReader(new
InputStreamReader(inStream));
+ String codecName = opts.hasArgument(codec)
+ ? codec.value(opts)
+ : DEFLATE_CODEC;
+ CodecFactory codecFactory = codecName.equals(DEFLATE_CODEC)
+ ? CodecFactory.deflateCodec(compressionLevel)
+ : CodecFactory.fromString(codecName);
+
+ BufferedInputStream inStream = Util.fileOrStdin(nargs.get(0), stdin);
+ BufferedOutputStream outStream = Util.fileOrStdout(nargs.get(1), out);
+
DataFileWriter<ByteBuffer> writer =
new DataFileWriter<ByteBuffer>(new GenericDatumWriter<ByteBuffer>());
- writer.setCodec(CodecFactory.deflateCodec(compressionLevel));
+ writer.setCodec(codecFactory);
writer.create(Schema.parse(TEXT_FILE_SCHEMA), outStream);
- String line;
- while((line = reader.readLine()) != null) {
- ByteBuffer buff = ByteBuffer.wrap(line.getBytes());
- writer.append(buff);
+ ByteBuffer line = ByteBuffer.allocate(128);
+ boolean returnSeen = false;
+ byte[] buf = new byte[8192];
+ for (int end = inStream.read(buf); end != -1; end = inStream.read(buf)) {
+ for (int i = 0; i < end; i++) {
+ int b = buf[i] & 0xFF;
+ if (b == '\n') { // newline
+ System.out.println("Writing line = "+line.position());
+ line.flip();
+ writer.append(line);
+ line.clear();
+ returnSeen = false;
+ } else if (b == '\r') { // return
+ line.flip();
+ writer.append(line);
+ line.clear();
+ returnSeen = true;
+ } else {
+ if (line.position() == line.limit()) { // reallocate longer line
+ ByteBuffer tempLine = ByteBuffer.allocate(line.limit()*2);
+ line.flip();
+ tempLine.put(line);
+ line = tempLine;
+ }
+ line.put((byte)b);
+ returnSeen = false;
+ }
+ }
}
-
- writer.flush();
writer.close();
inStream.close();
return 0;
Modified:
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/ToTextTool.java
URL:
http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/ToTextTool.java?rev=1095207&r1=1095206&r2=1095207&view=diff
==============================================================================
---
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/ToTextTool.java
(original)
+++
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/ToTextTool.java
Tue Apr 19 20:48:37 2011
@@ -35,7 +35,7 @@ import org.apache.avro.generic.GenericDa
public class ToTextTool implements Tool {
private static final String TEXT_FILE_SCHEMA =
"\"bytes\"";
- private static final byte[] LINE_SEPERATOR =
+ private static final byte[] LINE_SEPARATOR =
System.getProperty("line.separator").getBytes();
@Override
@@ -45,7 +45,7 @@ public class ToTextTool implements Tool
@Override
public String getShortDescription() {
- return "Converts and avro file to a text file.";
+ return "Converts an Avro data file to a text file.";
}
@Override
@@ -77,7 +77,7 @@ public class ToTextTool implements Tool
while (fileReader.hasNext()) {
ByteBuffer outBuff = (ByteBuffer) fileReader.next();
outStream.write(outBuff.array());
- outStream.write(LINE_SEPERATOR);
+ outStream.write(LINE_SEPARATOR);
}
outStream.close();
Added:
avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestTextFileTools.java
URL:
http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestTextFileTools.java?rev=1095207&view=auto
==============================================================================
---
avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestTextFileTools.java
(added)
+++
avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestTextFileTools.java
Tue Apr 19 20:48:37 2011
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.OutputStream;
+import java.io.InputStream;
+import java.io.FileOutputStream;
+import java.io.FileInputStream;
+import java.io.BufferedOutputStream;
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.avro.AvroTestUtil;
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumReader;
+import org.junit.BeforeClass;
+import org.junit.AfterClass;
+import org.junit.Test;
+
+@SuppressWarnings("deprecation")
+public class TestTextFileTools {
+ private static final int COUNT =
+ Integer.parseInt(System.getProperty("test.count", "10"));
+
+ static File linesFile;
+ static ByteBuffer[] lines;
+ static Schema schema;
+ static File schemaFile;
+
+ @BeforeClass
+ public static void writeRandomFile() throws IOException {
+ schema = Schema.create(Type.BYTES);
+ lines = new ByteBuffer[COUNT];
+ linesFile = AvroTestUtil.tempFile("random.lines");
+
+ OutputStream out =
+ new BufferedOutputStream(new FileOutputStream(linesFile));
+ Random rand = new Random();
+ for (int j = 0; j < COUNT; j++) {
+ byte[] line = new byte[rand.nextInt(512)];
+ System.out.println("Creating line = "+line.length);
+ for (int i = 0; i < line.length; i++) {
+ int b = rand.nextInt(256);
+ while (b == '\n' || b == '\r')
+ b = rand.nextInt(256);
+ line[i] = (byte)b;
+ }
+ out.write(line);
+ out.write('\n');
+ lines[j] = ByteBuffer.wrap(line);
+ }
+ out.close();
+ }
+
+ private void fromText(String name, String... args) throws Exception {
+ File avroFile = AvroTestUtil.tempFile(name + ".avro");
+
+ ArrayList<String> arglist = new ArrayList<String>();
+ arglist.addAll(Arrays.asList(args));
+ arglist.add(linesFile.toString());
+ arglist.add(avroFile.toString());
+
+ new FromTextTool().run(null, null, null, arglist);
+
+ // Read it back, and make sure it's valid.
+ DataFileReader<ByteBuffer> file = new DataFileReader<ByteBuffer>
+ (avroFile, new GenericDatumReader<ByteBuffer>());
+ int i = 0;
+ for (ByteBuffer line : file) {
+ System.out.println("Reading line = "+line.remaining());
+ assertEquals(line, lines[i]);
+ i++;
+ }
+ assertEquals(COUNT, i);
+ }
+
+ @Test
+ public void testFromText() throws Exception {
+ fromText("null", "--codec", "null");
+ fromText("deflate", "--codec", "deflate");
+ fromText("snappy", "--codec", "snappy");
+ }
+
+ @AfterClass
+ public static void testToText() throws Exception {
+ toText("null");
+ toText("deflate");
+ toText("snappy");
+ }
+
+ private static void toText(String name) throws Exception {
+ File avroFile = AvroTestUtil.tempFile(name + ".avro");
+ File outFile = AvroTestUtil.tempFile(name + ".lines");
+
+ ArrayList<String> arglist = new ArrayList<String>();
+ arglist.add(avroFile.toString());
+ arglist.add(outFile.toString());
+
+ new ToTextTool().run(null, null, null, arglist);
+
+ // Read it back, and make sure it's valid.
+ InputStream orig = new BufferedInputStream(new FileInputStream(linesFile));
+ InputStream after = new BufferedInputStream(new FileInputStream(outFile));
+
+ int b;
+ while ((b = orig.read()) != -1)
+ assertEquals(b, after.read());
+ assertEquals(-1, after.read());
+
+ orig.close();
+ after.close();
+ }
+
+}