Author: cutting
Date: Wed Dec 30 23:43:28 2009
New Revision: 894740
URL: http://svn.apache.org/viewvc?rev=894740&view=rev
Log:
Add four new avroj commands for converting data between JSON and binary
encodings.
Added:
hadoop/avro/trunk/src/java/org/apache/avro/tool/BinaryFragmentToJsonTool.java
hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileGetSchemaTool.java
hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileReadTool.java
hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileWriteTool.java
hadoop/avro/trunk/src/java/org/apache/avro/tool/JsonToBinaryFragmentTool.java
hadoop/avro/trunk/src/java/org/apache/avro/tool/Util.java
hadoop/avro/trunk/src/test/java/org/apache/avro/AvroTestUtil.java
hadoop/avro/trunk/src/test/java/org/apache/avro/tool/
hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestDataFileTools.java
hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestJsonToFromBinaryFragmentTools.java
Modified:
hadoop/avro/trunk/CHANGES.txt
hadoop/avro/trunk/src/java/org/apache/avro/io/JsonDecoder.java
hadoop/avro/trunk/src/java/org/apache/avro/tool/Main.java
hadoop/avro/trunk/src/test/bin/test_avroj.sh
Modified: hadoop/avro/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=894740&r1=894739&r2=894740&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Wed Dec 30 23:43:28 2009
@@ -29,6 +29,12 @@
AVRO-154. Add 'induce' sub-command to avroj command line tool.
(Philip Zeyliger via cutting)
+ AVRO-245. Add four new avroj commands:
+ - fromjson Reads JSON records and writes to an Avro data file.
+ - tojson Dumps an Avro data file as JSON, one record per line.
+ - fragtojson Renders a binary-encoded Avro datum as JSON.
+ - jsontofrag Renders a JSON-encoded Avro datum as binary.
+
IMPROVEMENTS
AVRO-157. Changes from code review comments for C++. (sbanacho)
Modified: hadoop/avro/trunk/src/java/org/apache/avro/io/JsonDecoder.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/io/JsonDecoder.java?rev=894740&r1=894739&r2=894740&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/io/JsonDecoder.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/io/JsonDecoder.java Wed Dec 30
23:43:28 2009
@@ -18,6 +18,7 @@
package org.apache.avro.io;
import java.io.IOException;
+import java.io.EOFException;
import java.io.InputStream;
import java.nio.ByteBuffer;
@@ -47,6 +48,12 @@
this(new JsonGrammarGenerator().generate(schema), in);
}
+ private void advance(Symbol symbol) throws IOException {
+ if (in.getCurrentToken() == null && this.parser.depth() == 1)
+ throw new EOFException();
+ parser.advance(symbol);
+ }
+
@Override
public void init(InputStream in) throws IOException {
parser.reset();
@@ -56,7 +63,7 @@
@Override
public void readNull() throws IOException {
- parser.advance(Symbol.NULL);
+ advance(Symbol.NULL);
if (in.getCurrentToken() == JsonToken.VALUE_NULL) {
in.nextToken();
} else {
@@ -66,7 +73,7 @@
@Override
public boolean readBoolean() throws IOException {
- parser.advance(Symbol.BOOLEAN);
+ advance(Symbol.BOOLEAN);
JsonToken t = in.getCurrentToken();
if (t == JsonToken.VALUE_TRUE || t == JsonToken.VALUE_FALSE) {
in.nextToken();
@@ -78,7 +85,7 @@
@Override
public int readInt() throws IOException {
- parser.advance(Symbol.INT);
+ advance(Symbol.INT);
if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_INT) {
int result = in.getIntValue();
in.nextToken();
@@ -90,7 +97,7 @@
@Override
public long readLong() throws IOException {
- parser.advance(Symbol.LONG);
+ advance(Symbol.LONG);
if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_INT) {
long result = in.getLongValue();
in.nextToken();
@@ -102,7 +109,7 @@
@Override
public float readFloat() throws IOException {
- parser.advance(Symbol.FLOAT);
+ advance(Symbol.FLOAT);
if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_FLOAT) {
float result = in.getFloatValue();
in.nextToken();
@@ -114,7 +121,7 @@
@Override
public double readDouble() throws IOException {
- parser.advance(Symbol.DOUBLE);
+ advance(Symbol.DOUBLE);
if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_FLOAT) {
double result = in.getDoubleValue();
in.nextToken();
@@ -126,7 +133,7 @@
@Override
public Utf8 readString(Utf8 old) throws IOException {
- parser.advance(Symbol.STRING);
+ advance(Symbol.STRING);
if (parser.topSymbol() == Symbol.MAP_KEY_MARKER) {
parser.advance(Symbol.MAP_KEY_MARKER);
if (in.getCurrentToken() != JsonToken.FIELD_NAME) {
@@ -144,7 +151,7 @@
@Override
public void skipString() throws IOException {
- parser.advance(Symbol.STRING);
+ advance(Symbol.STRING);
if (parser.topSymbol() == Symbol.MAP_KEY_MARKER) {
parser.advance(Symbol.MAP_KEY_MARKER);
if (in.getCurrentToken() != JsonToken.FIELD_NAME) {
@@ -160,7 +167,7 @@
@Override
public ByteBuffer readBytes(ByteBuffer old) throws IOException {
- parser.advance(Symbol.BYTES);
+ advance(Symbol.BYTES);
if (in.getCurrentToken() == JsonToken.VALUE_STRING) {
byte[] result = readByteArray();
in.nextToken();
@@ -177,7 +184,7 @@
@Override
public void skipBytes() throws IOException {
- parser.advance(Symbol.BYTES);
+ advance(Symbol.BYTES);
if (in.getCurrentToken() == JsonToken.VALUE_STRING) {
in.nextToken();
} else {
@@ -186,7 +193,7 @@
}
private void checkFixed(int size) throws IOException {
- parser.advance(Symbol.FIXED);
+ advance(Symbol.FIXED);
Symbol.IntCheckAction top = (Symbol.IntCheckAction) parser.popSymbol();
if (size != top.size) {
throw new AvroTypeException(
@@ -232,14 +239,14 @@
@Override
protected void skipFixed() throws IOException {
- parser.advance(Symbol.FIXED);
+ advance(Symbol.FIXED);
Symbol.IntCheckAction top = (Symbol.IntCheckAction) parser.popSymbol();
doSkipFixed(top.size);
}
@Override
public int readEnum() throws IOException {
- parser.advance(Symbol.ENUM);
+ advance(Symbol.ENUM);
Symbol.EnumLabelsAction top = (Symbol.EnumLabelsAction) parser.popSymbol();
if (in.getCurrentToken() == JsonToken.VALUE_STRING) {
in.getText();
@@ -256,7 +263,7 @@
@Override
public long readArrayStart() throws IOException {
- parser.advance(Symbol.ARRAY_START);
+ advance(Symbol.ARRAY_START);
if (in.getCurrentToken() == JsonToken.START_ARRAY) {
in.nextToken();
return doArrayNext();
@@ -267,7 +274,7 @@
@Override
public long arrayNext() throws IOException {
- parser.advance(Symbol.ITEM_END);
+ advance(Symbol.ITEM_END);
return doArrayNext();
}
@@ -283,11 +290,11 @@
@Override
public long skipArray() throws IOException {
- parser.advance(Symbol.ARRAY_START);
+ advance(Symbol.ARRAY_START);
if (in.getCurrentToken() == JsonToken.START_ARRAY) {
in.skipChildren();
in.nextToken();
- parser.advance(Symbol.ARRAY_END);
+ advance(Symbol.ARRAY_END);
} else {
throw error("array-start");
}
@@ -296,7 +303,7 @@
@Override
public long readMapStart() throws IOException {
- parser.advance(Symbol.MAP_START);
+ advance(Symbol.MAP_START);
if (in.getCurrentToken() == JsonToken.START_OBJECT) {
in.nextToken();
return doMapNext();
@@ -307,14 +314,14 @@
@Override
public long mapNext() throws IOException {
- parser.advance(Symbol.ITEM_END);
+ advance(Symbol.ITEM_END);
return doMapNext();
}
private long doMapNext() throws IOException {
if (in.getCurrentToken() == JsonToken.END_OBJECT) {
in.nextToken();
- parser.advance(Symbol.MAP_END);
+ advance(Symbol.MAP_END);
return 0;
} else {
return 1;
@@ -323,11 +330,11 @@
@Override
public long skipMap() throws IOException {
- parser.advance(Symbol.MAP_START);
+ advance(Symbol.MAP_START);
if (in.getCurrentToken() == JsonToken.START_OBJECT) {
in.skipChildren();
in.nextToken();
- parser.advance(Symbol.MAP_END);
+ advance(Symbol.MAP_END);
} else {
throw error("map-start");
}
@@ -336,7 +343,7 @@
@Override
public int readIndex() throws IOException {
- parser.advance(Symbol.UNION);
+ advance(Symbol.UNION);
Symbol.Alternative a = (Symbol.Alternative) parser.popSymbol();
String label;
Added:
hadoop/avro/trunk/src/java/org/apache/avro/tool/BinaryFragmentToJsonTool.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/tool/BinaryFragmentToJsonTool.java?rev=894740&view=auto
==============================================================================
---
hadoop/avro/trunk/src/java/org/apache/avro/tool/BinaryFragmentToJsonTool.java
(added)
+++
hadoop/avro/trunk/src/java/org/apache/avro/tool/BinaryFragmentToJsonTool.java
Wed Dec 30 23:43:28 2009
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.io.BinaryDecoder;
+import org.apache.avro.io.JsonEncoder;
+import org.codehaus.jackson.JsonEncoding;
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonGenerator;
+
+/** Converts an input file from Avro binary into JSON. */
+public class BinaryFragmentToJsonTool implements Tool {
+ @Override
+ public int run(InputStream stdin, PrintStream out, PrintStream err,
+ List<String> args) throws Exception {
+ if (args.size() != 2) {
+ err.println("Expected 1 arguments: schema binary_data_file");
+ err.println("Use '-' as binary_data_file for stdin.");
+ return 1;
+ }
+ Schema schema = Schema.parse(args.get(0));
+ InputStream input;
+ boolean needsClosing;
+ if (args.get(1).equals("-")) {
+ input = stdin;
+ needsClosing = false;
+ } else {
+ input = new FileInputStream(args.get(1));
+ needsClosing = true;
+ }
+ try {
+ DatumReader<Object> reader = new GenericDatumReader<Object>(schema);
+ Object datum = reader.read(null, new BinaryDecoder(input));
+ DatumWriter<Object> writer = new GenericDatumWriter<Object>(schema);
+ JsonGenerator g =
+ new JsonFactory().createJsonGenerator(out, JsonEncoding.UTF8);
+ g.useDefaultPrettyPrinter();
+ writer.write(datum, new JsonEncoder(schema, g));
+ g.flush();
+ out.println();
+ out.flush();
+ } finally {
+ if (needsClosing) {
+ input.close();
+ }
+ }
+ return 0;
+ }
+
+ @Override
+ public String getName() {
+ return "fragtojson";
+ }
+
+ @Override
+ public String getShortDescription() {
+ return "Renders a binary-encoded Avro datum as JSON.";
+ }
+}
Added:
hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileGetSchemaTool.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileGetSchemaTool.java?rev=894740&view=auto
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileGetSchemaTool.java
(added)
+++ hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileGetSchemaTool.java
Wed Dec 30 23:43:28 2009
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.File;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.generic.GenericDatumReader;
+
+/** Reads a data file to get its schema. */
+public class DataFileGetSchemaTool implements Tool {
+
+ @Override
+ public String getName() {
+ return "getschema";
+ }
+
+ @Override
+ public String getShortDescription() {
+ return "Prints out schema of an Avro data file.";
+ }
+
+ @Override
+ public int run(InputStream stdin, PrintStream out, PrintStream err,
+ List<String> args) throws Exception {
+ if (args.size() != 1) {
+ err.println("Expected 1 argument: input_file");
+ return 1;
+ }
+ DataFileReader<Void> reader =
+ new DataFileReader<Void>(new File(args.get(0)),
+ new GenericDatumReader<Void>());
+ out.println(reader.getSchema().toString(true));
+ return 0;
+ }
+}
Added: hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileReadTool.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileReadTool.java?rev=894740&view=auto
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileReadTool.java
(added)
+++ hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileReadTool.java Wed
Dec 30 23:43:28 2009
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.File;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.io.Encoder;
+import org.apache.avro.io.JsonEncoder;
+import org.codehaus.jackson.JsonGenerator;
+
+/** Reads a data file and dumps to JSON */
+public class DataFileReadTool implements Tool {
+
+ @Override
+ public String getName() {
+ return "tojson";
+ }
+
+ @Override
+ public String getShortDescription() {
+ return "Dumps an Avro data file as JSON, one record per line.";
+ }
+
+ @Override
+ public int run(InputStream stdin, PrintStream out, PrintStream err,
+ List<String> args) throws Exception {
+ if (args.size() != 1) {
+ // Unlike other commands, "-" can't be used for stdin, because
+ // we can only use seekable files.
+ err.println("Expected 1 argument: input_file.");
+ return 1;
+ }
+
+ GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
+ DataFileReader<Object> fileReader =
+ new DataFileReader<Object>(new File(args.get(0)), reader);
+ try {
+ Schema schema = fileReader.getSchema();
+ DatumWriter<Object> writer = new GenericDatumWriter<Object>(schema);
+ Encoder encoder = new JsonEncoder(schema, (JsonGenerator)null);
+ Object datum;
+ while (null != (datum = fileReader.next(null))) {
+ // init() recreates the internal Jackson JsonGenerator
+ encoder.init(out);
+ writer.write(datum, encoder);
+ encoder.flush();
+ out.println();
+ }
+ out.flush();
+ } finally {
+ fileReader.close();
+ }
+ return 0;
+ }
+
+}
Added: hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileWriteTool.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileWriteTool.java?rev=894740&view=auto
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileWriteTool.java
(added)
+++ hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileWriteTool.java Wed
Dec 30 23:43:28 2009
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.EOFException;
+import java.io.InputStream;
+import java.io.DataInputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.io.Decoder;
+import org.apache.avro.io.JsonDecoder;
+
+/** Reads new-line delimited JSON records and writers an Avro data file. */
+public class DataFileWriteTool implements Tool {
+
+ @Override
+ public String getName() {
+ return "fromjson";
+ }
+
+ @Override
+ public String getShortDescription() {
+ return "Reads JSON records and writes an Avro data file.";
+ }
+
+ @Override
+ public int run(InputStream stdin, PrintStream out, PrintStream err,
+ List<String> args) throws Exception {
+ if (args.size() != 2) {
+ err.println("Expected 2 args: schema input_file");
+ return 1;
+ }
+
+ Schema schema = Schema.parse(args.get(0));
+ DatumReader<Object> reader = new GenericDatumReader<Object>(schema);
+
+ InputStream input = Util.fileOrStdin(args.get(1), stdin);
+ try {
+ DataInputStream din = new DataInputStream(input);
+ DataFileWriter<Object> writer =
+ new DataFileWriter<Object>(schema, out,
+ new GenericDatumWriter<Object>());
+ Decoder decoder = new JsonDecoder(schema, din);
+ Object datum;
+ while (true) {
+ try {
+ datum = reader.read(null, decoder);
+ } catch (EOFException e) {
+ break;
+ }
+ writer.append(datum);
+ }
+ writer.close();
+ } finally {
+ if (input != stdin) {
+ input.close();
+ }
+ }
+ return 0;
+ }
+}
Added:
hadoop/avro/trunk/src/java/org/apache/avro/tool/JsonToBinaryFragmentTool.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/tool/JsonToBinaryFragmentTool.java?rev=894740&view=auto
==============================================================================
---
hadoop/avro/trunk/src/java/org/apache/avro/tool/JsonToBinaryFragmentTool.java
(added)
+++
hadoop/avro/trunk/src/java/org/apache/avro/tool/JsonToBinaryFragmentTool.java
Wed Dec 30 23:43:28 2009
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.io.BinaryEncoder;
+import org.apache.avro.io.JsonDecoder;
+
+/** Tool to convert JSON data into the binary form. */
+public class JsonToBinaryFragmentTool implements Tool {
+ @Override
+ public int run(InputStream stdin, PrintStream out, PrintStream err,
+ List<String> args) throws Exception {
+ if (args.size() != 2) {
+ err.println("Expected 2 arguments: schema json_data_file");
+ err.println("Use '-' as json_data_file for stdin.");
+ return 1;
+ }
+ Schema schema = Schema.parse(args.get(0));
+ InputStream input;
+ boolean needsClosing;
+ if (args.get(1).equals("-")) {
+ input = stdin;
+ needsClosing = false;
+ } else {
+ input = new FileInputStream(args.get(1));
+ needsClosing = true;
+ }
+
+ try {
+ GenericDatumReader<Object> reader =
+ new GenericDatumReader<Object>(schema);
+ Object datum = reader.read(null, new JsonDecoder(schema, input));
+
+ GenericDatumWriter<Object> writer =
+ new GenericDatumWriter<Object>(schema);
+ writer.write(datum, new BinaryEncoder(out));
+ } finally {
+ if (needsClosing) {
+ input.close();
+ }
+ }
+ return 0;
+ }
+
+ @Override
+ public String getName() {
+ return "jsontofrag";
+ }
+
+ @Override
+ public String getShortDescription() {
+ return "Renders a JSON-encoded Avro datum as binary.";
+ }
+}
Modified: hadoop/avro/trunk/src/java/org/apache/avro/tool/Main.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/tool/Main.java?rev=894740&r1=894739&r2=894740&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/tool/Main.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/tool/Main.java Wed Dec 30
23:43:28 2009
@@ -37,7 +37,12 @@
tools = new TreeMap<String, Tool>();
for (Tool tool : new Tool[] {
new SpecificCompilerTool(),
- new InduceSchemaTool()
+ new InduceSchemaTool(),
+ new JsonToBinaryFragmentTool(),
+ new BinaryFragmentToJsonTool(),
+ new DataFileReadTool(),
+ new DataFileWriteTool(),
+ new DataFileGetSchemaTool(),
}) {
Tool prev = tools.put(tool.getName(), tool);
if (prev != null) {
Added: hadoop/avro/trunk/src/java/org/apache/avro/tool/Util.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/tool/Util.java?rev=894740&view=auto
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/tool/Util.java (added)
+++ hadoop/avro/trunk/src/java/org/apache/avro/tool/Util.java Wed Dec 30
23:43:28 2009
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.InputStream;
+
+/** Static utility methods for tools. */
+class Util {
+ /**
+ * Returns stdin if filename is "-", else opens the file
+ * and returns an InputStream for it.
+ */
+ static InputStream fileOrStdin(String filename, InputStream stdin)
+ throws FileNotFoundException {
+ if (filename.equals("-")) {
+ return stdin;
+ } else {
+ return new FileInputStream(new File(filename));
+ }
+ }
+}
Modified: hadoop/avro/trunk/src/test/bin/test_avroj.sh
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/bin/test_avroj.sh?rev=894740&r1=894739&r2=894740&view=diff
==============================================================================
--- hadoop/avro/trunk/src/test/bin/test_avroj.sh (original)
+++ hadoop/avro/trunk/src/test/bin/test_avroj.sh Wed Dec 30 23:43:28 2009
@@ -62,6 +62,25 @@
$CMD induce build/test/classes 'org.apache.avro.TestReflect$C' \
| tr -d '\n ' | grep -q -F '{"protocol":"C"'
######################################################################
+# Test to/from avro (both fragments and data files)
+$CMD jsontofrag '"string"' <(echo '"Long string implies readable length
encoding."') \
+ | cmp -s - <(echo -n 'ZLong string implies readable length encoding.')
+$CMD fragtojson '"string"' <(printf \\006foo) \
+ | cmp -s - <(echo '"foo"')
+# And test that stdin support (via "-") works too
+echo '"The identity function"' \
+ | $CMD jsontofrag '"string"' - \
+ | $CMD fragtojson '"string"' - \
+ | cmp -s - <(echo '"The identity function"')
+
+$CMD fromjson '"string"' <(echo '"foo"'; echo '"bar"') \
+ > $TMPDIR/data_file_write.avro
+$CMD tojson $TMPDIR/data_file_write.avro \
+ | cmp -s - <(echo '"foo"'; echo '"bar"')
+$CMD getschema $TMPDIR/data_file_write.avro \
+ | cmp -s - <(echo '"string"')
+######################################################################
+
$CMD 2>&1 | grep -q "Available tools:"
$CMD doesnotexist 2>&1 | grep -q "Available tools:"
! $CMD 2>&1 > /dev/null
Added: hadoop/avro/trunk/src/test/java/org/apache/avro/AvroTestUtil.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/java/org/apache/avro/AvroTestUtil.java?rev=894740&view=auto
==============================================================================
--- hadoop/avro/trunk/src/test/java/org/apache/avro/AvroTestUtil.java (added)
+++ hadoop/avro/trunk/src/test/java/org/apache/avro/AvroTestUtil.java Wed Dec
30 23:43:28 2009
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro;
+
+import java.io.File;
+
+/** Utilities for Avro tests. */
+public class AvroTestUtil {
+ static final File TMPDIR = new File(System.getProperty("test.dir", "/tmp"));
+
+ private AvroTestUtil() { }
+
+ /** Create a temporary file in a test-appropriate directory. */
+ public static File tempFile(String name) {
+ return new File(TMPDIR, name);
+ }
+}
Added:
hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestDataFileTools.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestDataFileTools.java?rev=894740&view=auto
==============================================================================
--- hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestDataFileTools.java
(added)
+++ hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestDataFileTools.java
Wed Dec 30 23:43:28 2009
@@ -0,0 +1,181 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Arrays;
+
+import org.apache.avro.AvroTestUtil;
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.tools.ant.filters.StringInputStream;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestDataFileTools {
+ static final int COUNT = 10;
+ static File sampleFile;
+ static String jsonData;
+ static Schema schema;
+
+ @BeforeClass
+ public static void writeSampleFile() throws IOException {
+ sampleFile = AvroTestUtil.tempFile(TestDataFileTools.class + ".avro");
+ schema = Schema.create(Type.INT);
+
+ DataFileWriter<Object> writer = new DataFileWriter<Object>(
+ schema, sampleFile, new GenericDatumWriter<Object>(schema));
+ StringBuilder builder = new StringBuilder();
+
+ for (int i = 0; i < COUNT; ++i) {
+ builder.append(Integer.toString(i));
+ builder.append("\n");
+ writer.append(i);
+ }
+
+ writer.flush();
+ writer.close();
+
+ jsonData = builder.toString();
+ }
+
+ @Test
+ public void testRead() throws Exception {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ PrintStream p = new PrintStream(baos);
+ new DataFileReadTool().run(
+ null, // stdin
+ p, // stdout
+ null, // stderr
+ Arrays.asList(sampleFile.getPath()));
+ assertEquals(jsonData.toString(), baos.toString("UTF-8"));
+ }
+
+ @Test
+ public void testGetSchema() throws Exception {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ PrintStream p = new PrintStream(baos);
+ new DataFileGetSchemaTool().run(
+ null, // stdin
+ p, // stdout
+ null, // stderr
+ Arrays.asList(sampleFile.getPath()));
+ assertEquals(schema.toString() + "\n", baos.toString("UTF-8"));
+ }
+
+ @Test
+ public void testWrite() throws Exception {
+ File outFile = AvroTestUtil.tempFile(
+ TestDataFileTools.class + ".testWrite." + ".avro");
+ FileOutputStream fout = new FileOutputStream(outFile);
+ PrintStream out = new PrintStream(fout);
+ new DataFileWriteTool().run(
+ new StringInputStream(jsonData),
+ new PrintStream(out), // stdout
+ null, // stderr
+ Arrays.asList(schema.toString(), "-"));
+ out.close();
+ fout.close();
+
+ // Read it back, and make sure it's valid.
+ GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
+ DataFileReader<Object> fileReader = new
DataFileReader<Object>(outFile,reader);
+ Object datum;
+ int i = 0;
+ while (null != (datum = fileReader.next(null))) {
+ assertEquals(i, datum);
+ i++;
+ }
+ assertEquals(COUNT, i);
+ assertEquals(schema, fileReader.getSchema());
+ assertEquals(COUNT, fileReader.getCount());
+ }
+
+ @Test
+ public void testFailureOnWritingPartialJSONValues() throws Exception {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ PrintStream out = new PrintStream(baos);
+ try {
+ new DataFileWriteTool().run(
+ new StringInputStream("{"),
+ new PrintStream(out), // stdout
+ null, // stderr
+ Arrays.asList("{ \"type\":\"record\", \"fields\":" +
+ "[{\"name\":\"foo\", \"type\":\"string\"}], " +
+ "\"name\":\"boring\" }", "-"));
+ fail("Expected exception.");
+ } catch (IOException expected) {
+ // expected
+ }
+ }
+
+ @Test
+ public void testWritingZeroJsonValues() throws Exception {
+ File outFile = writeToAvroFile("zerojsonvalues",
+ schema.toString(),
+ "");
+ assertEquals(0, countRecords(outFile));
+ }
+
+ private int countRecords(File outFile) throws IOException {
+ GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
+ DataFileReader<Object> fileReader =
+ new DataFileReader<Object>(outFile,reader);
+ int i = 0;
+ while (null != fileReader.next(null)) {
+ i++;
+ }
+ return i;
+ }
+
+ @Test
+ public void testDifferentSeparatorsBetweenJsonRecords() throws Exception {
+ File outFile = writeToAvroFile(
+ "seperators",
+ "{ \"type\":\"array\", \"items\":\"int\" }",
+ "[] [] []\n[][3] ");
+ assertEquals(5, countRecords(outFile));
+ }
+
+ public File writeToAvroFile(String testName, String schema, String json)
throws Exception {
+ File outFile = AvroTestUtil.tempFile(
+ TestDataFileTools.class + "." + testName + ".avro");
+ FileOutputStream fout = new FileOutputStream(outFile);
+ PrintStream out = new PrintStream(fout);
+ new DataFileWriteTool().run(
+ new StringInputStream(json),
+ new PrintStream(out), // stdout
+ null, // stderr
+ Arrays.asList(schema, "-"));
+ out.close();
+ fout.close();
+ return outFile;
+ }
+
+}
Added:
hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestJsonToFromBinaryFragmentTools.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestJsonToFromBinaryFragmentTools.java?rev=894740&view=auto
==============================================================================
---
hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestJsonToFromBinaryFragmentTools.java
(added)
+++
hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestJsonToFromBinaryFragmentTools.java
Wed Dec 30 23:43:28 2009
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+import java.util.Arrays;
+
+import org.apache.avro.Schema;
+import org.junit.Test;
+
+/**
+ * Tests both {...@link JsonToBinaryFragmentTool}
+ * and {...@link BinaryFragmentToJsonTool}.
+ */
+public class TestJsonToFromBinaryFragmentTools {
+ private static final Schema STRING_SCHEMA = Schema.parse("\"string\"");
+ private static final String UTF8 = "utf-8";
+ private static final String AVRO =
+ "ZLong string implies readable length encoding.";
+ private static final String JSON =
+ "\"Long string implies readable length encoding.\"";
+
+ @Test
+ public void testBinaryToJson() throws Exception {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ PrintStream p = new PrintStream(baos);
+
+ new BinaryFragmentToJsonTool().run(
+ new ByteArrayInputStream(AVRO.getBytes(UTF8)), // stdin
+ p, // stdout
+ null, // stderr
+ Arrays.asList(STRING_SCHEMA.toString(), "-"));
+ assertEquals(JSON + "\n", baos.toString(UTF8));
+ }
+
+ @Test
+ public void testJsonToBinary() throws Exception {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ PrintStream p = new PrintStream(baos);
+ new JsonToBinaryFragmentTool().run(
+ new ByteArrayInputStream(JSON.getBytes(UTF8)), // stdin
+ p, // stdout
+ null, // stderr
+ Arrays.asList(STRING_SCHEMA.toString(), "-"));
+ assertEquals(AVRO, baos.toString(UTF8));
+ }
+}