Author: cutting
Date: Wed Dec 30 23:43:28 2009
New Revision: 894740

URL: http://svn.apache.org/viewvc?rev=894740&view=rev
Log:
Add four new avroj commands for converting data between JSON and binary 
encodings.

Added:
    
hadoop/avro/trunk/src/java/org/apache/avro/tool/BinaryFragmentToJsonTool.java
    hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileGetSchemaTool.java
    hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileReadTool.java
    hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileWriteTool.java
    
hadoop/avro/trunk/src/java/org/apache/avro/tool/JsonToBinaryFragmentTool.java
    hadoop/avro/trunk/src/java/org/apache/avro/tool/Util.java
    hadoop/avro/trunk/src/test/java/org/apache/avro/AvroTestUtil.java
    hadoop/avro/trunk/src/test/java/org/apache/avro/tool/
    hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestDataFileTools.java
    
hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestJsonToFromBinaryFragmentTools.java
Modified:
    hadoop/avro/trunk/CHANGES.txt
    hadoop/avro/trunk/src/java/org/apache/avro/io/JsonDecoder.java
    hadoop/avro/trunk/src/java/org/apache/avro/tool/Main.java
    hadoop/avro/trunk/src/test/bin/test_avroj.sh

Modified: hadoop/avro/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=894740&r1=894739&r2=894740&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Wed Dec 30 23:43:28 2009
@@ -29,6 +29,12 @@
     AVRO-154. Add 'induce' sub-command to avroj command line tool.
     (Philip Zeyliger via cutting)
 
+    AVRO-245. Add four new avroj commands:
+      - fromjson Reads JSON records and writes to an Avro data file.
+      - tojson  Dumps an Avro data file as JSON, one record per line.
+      - fragtojson Renders a binary-encoded Avro datum as JSON.
+      - jsontofrag Renders a JSON-encoded Avro datum as binary.
+
   IMPROVEMENTS
 
     AVRO-157. Changes from code review comments for C++. (sbanacho)

Modified: hadoop/avro/trunk/src/java/org/apache/avro/io/JsonDecoder.java
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/io/JsonDecoder.java?rev=894740&r1=894739&r2=894740&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/io/JsonDecoder.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/io/JsonDecoder.java Wed Dec 30 
23:43:28 2009
@@ -18,6 +18,7 @@
 package org.apache.avro.io;
 
 import java.io.IOException;
+import java.io.EOFException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
 
@@ -47,6 +48,12 @@
     this(new JsonGrammarGenerator().generate(schema), in);
   }
 
+  private void advance(Symbol symbol) throws IOException {
+    if (in.getCurrentToken() == null && this.parser.depth() == 1)
+      throw new EOFException();
+    parser.advance(symbol);
+  }
+
   @Override
   public void init(InputStream in) throws IOException {
     parser.reset();
@@ -56,7 +63,7 @@
 
   @Override
   public void readNull() throws IOException {
-    parser.advance(Symbol.NULL);
+    advance(Symbol.NULL);
     if (in.getCurrentToken() == JsonToken.VALUE_NULL) {
       in.nextToken();
     } else {
@@ -66,7 +73,7 @@
 
   @Override
   public boolean readBoolean() throws IOException {
-    parser.advance(Symbol.BOOLEAN);
+    advance(Symbol.BOOLEAN);
     JsonToken t = in.getCurrentToken(); 
     if (t == JsonToken.VALUE_TRUE || t == JsonToken.VALUE_FALSE) {
       in.nextToken();
@@ -78,7 +85,7 @@
 
   @Override
   public int readInt() throws IOException {
-    parser.advance(Symbol.INT);
+    advance(Symbol.INT);
     if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_INT) {
       int result = in.getIntValue();
       in.nextToken();
@@ -90,7 +97,7 @@
     
   @Override
   public long readLong() throws IOException {
-    parser.advance(Symbol.LONG);
+    advance(Symbol.LONG);
     if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_INT) {
       long result = in.getLongValue();
       in.nextToken();
@@ -102,7 +109,7 @@
 
   @Override
   public float readFloat() throws IOException {
-    parser.advance(Symbol.FLOAT);
+    advance(Symbol.FLOAT);
     if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_FLOAT) {
       float result = in.getFloatValue();
       in.nextToken();
@@ -114,7 +121,7 @@
 
   @Override
   public double readDouble() throws IOException {
-    parser.advance(Symbol.DOUBLE);
+    advance(Symbol.DOUBLE);
     if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_FLOAT) {
       double result = in.getDoubleValue();
       in.nextToken();
@@ -126,7 +133,7 @@
     
   @Override
   public Utf8 readString(Utf8 old) throws IOException {
-    parser.advance(Symbol.STRING);
+    advance(Symbol.STRING);
     if (parser.topSymbol() == Symbol.MAP_KEY_MARKER) {
       parser.advance(Symbol.MAP_KEY_MARKER);
       if (in.getCurrentToken() != JsonToken.FIELD_NAME) {
@@ -144,7 +151,7 @@
 
   @Override
   public void skipString() throws IOException {
-    parser.advance(Symbol.STRING);
+    advance(Symbol.STRING);
     if (parser.topSymbol() == Symbol.MAP_KEY_MARKER) {
       parser.advance(Symbol.MAP_KEY_MARKER);
       if (in.getCurrentToken() != JsonToken.FIELD_NAME) {
@@ -160,7 +167,7 @@
 
   @Override
   public ByteBuffer readBytes(ByteBuffer old) throws IOException {
-    parser.advance(Symbol.BYTES);
+    advance(Symbol.BYTES);
     if (in.getCurrentToken() == JsonToken.VALUE_STRING) {
       byte[] result = readByteArray();
       in.nextToken();
@@ -177,7 +184,7 @@
 
   @Override
   public void skipBytes() throws IOException {
-    parser.advance(Symbol.BYTES);
+    advance(Symbol.BYTES);
     if (in.getCurrentToken() == JsonToken.VALUE_STRING) {
       in.nextToken();
     } else {
@@ -186,7 +193,7 @@
   }
 
   private void checkFixed(int size) throws IOException {
-    parser.advance(Symbol.FIXED);
+    advance(Symbol.FIXED);
     Symbol.IntCheckAction top = (Symbol.IntCheckAction) parser.popSymbol();
     if (size != top.size) {
       throw new AvroTypeException(
@@ -232,14 +239,14 @@
 
   @Override
   protected void skipFixed() throws IOException {
-    parser.advance(Symbol.FIXED);
+    advance(Symbol.FIXED);
     Symbol.IntCheckAction top = (Symbol.IntCheckAction) parser.popSymbol();
     doSkipFixed(top.size);
   }
 
   @Override
   public int readEnum() throws IOException {
-    parser.advance(Symbol.ENUM);
+    advance(Symbol.ENUM);
     Symbol.EnumLabelsAction top = (Symbol.EnumLabelsAction) parser.popSymbol();
     if (in.getCurrentToken() == JsonToken.VALUE_STRING) {
       in.getText();
@@ -256,7 +263,7 @@
 
   @Override
   public long readArrayStart() throws IOException {
-    parser.advance(Symbol.ARRAY_START);
+    advance(Symbol.ARRAY_START);
     if (in.getCurrentToken() == JsonToken.START_ARRAY) {
       in.nextToken();
       return doArrayNext();
@@ -267,7 +274,7 @@
 
   @Override
   public long arrayNext() throws IOException {
-    parser.advance(Symbol.ITEM_END);
+    advance(Symbol.ITEM_END);
     return doArrayNext();
   }
 
@@ -283,11 +290,11 @@
 
   @Override
   public long skipArray() throws IOException {
-    parser.advance(Symbol.ARRAY_START);
+    advance(Symbol.ARRAY_START);
     if (in.getCurrentToken() == JsonToken.START_ARRAY) {
       in.skipChildren();
       in.nextToken();
-      parser.advance(Symbol.ARRAY_END);    
+      advance(Symbol.ARRAY_END);    
     } else {
       throw error("array-start");
     }
@@ -296,7 +303,7 @@
 
   @Override
   public long readMapStart() throws IOException {
-    parser.advance(Symbol.MAP_START);
+    advance(Symbol.MAP_START);
     if (in.getCurrentToken() == JsonToken.START_OBJECT) {
       in.nextToken();
       return doMapNext();
@@ -307,14 +314,14 @@
 
   @Override
   public long mapNext() throws IOException {
-    parser.advance(Symbol.ITEM_END);
+    advance(Symbol.ITEM_END);
     return doMapNext();
   }
 
   private long doMapNext() throws IOException {
     if (in.getCurrentToken() == JsonToken.END_OBJECT) {
       in.nextToken();
-      parser.advance(Symbol.MAP_END);
+      advance(Symbol.MAP_END);
       return 0;
     } else {
       return 1;
@@ -323,11 +330,11 @@
 
   @Override
   public long skipMap() throws IOException {
-    parser.advance(Symbol.MAP_START);
+    advance(Symbol.MAP_START);
     if (in.getCurrentToken() == JsonToken.START_OBJECT) {
       in.skipChildren();
       in.nextToken();
-      parser.advance(Symbol.MAP_END);    
+      advance(Symbol.MAP_END);    
     } else {
       throw error("map-start");
     }
@@ -336,7 +343,7 @@
 
   @Override
   public int readIndex() throws IOException {
-    parser.advance(Symbol.UNION);
+    advance(Symbol.UNION);
     Symbol.Alternative a = (Symbol.Alternative) parser.popSymbol();
     
     String label;

Added: 
hadoop/avro/trunk/src/java/org/apache/avro/tool/BinaryFragmentToJsonTool.java
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/tool/BinaryFragmentToJsonTool.java?rev=894740&view=auto
==============================================================================
--- 
hadoop/avro/trunk/src/java/org/apache/avro/tool/BinaryFragmentToJsonTool.java 
(added)
+++ 
hadoop/avro/trunk/src/java/org/apache/avro/tool/BinaryFragmentToJsonTool.java 
Wed Dec 30 23:43:28 2009
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.io.BinaryDecoder;
+import org.apache.avro.io.JsonEncoder;
+import org.codehaus.jackson.JsonEncoding;
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonGenerator;
+
+/** Converts an input file from Avro binary into JSON. */
+public class BinaryFragmentToJsonTool implements Tool {
+  @Override
+  public int run(InputStream stdin, PrintStream out, PrintStream err,
+      List<String> args) throws Exception {
+    if (args.size() != 2) {
+      err.println("Expected 1 arguments: schema binary_data_file");
+      err.println("Use '-' as binary_data_file for stdin.");
+      return 1;
+    }
+    Schema schema = Schema.parse(args.get(0));
+    InputStream input;
+    boolean needsClosing;
+    if (args.get(1).equals("-")) {
+      input = stdin;
+      needsClosing = false;
+    } else {
+      input = new FileInputStream(args.get(1));
+      needsClosing = true;
+    }
+    try {
+      DatumReader<Object> reader = new GenericDatumReader<Object>(schema);
+      Object datum = reader.read(null, new BinaryDecoder(input));
+      DatumWriter<Object> writer = new GenericDatumWriter<Object>(schema);
+      JsonGenerator g =
+        new JsonFactory().createJsonGenerator(out, JsonEncoding.UTF8);
+      g.useDefaultPrettyPrinter();
+      writer.write(datum, new JsonEncoder(schema, g));
+      g.flush();
+      out.println();
+      out.flush();
+    } finally {
+      if (needsClosing) {
+        input.close();
+      }
+    }
+    return 0;
+  }
+
+  @Override
+  public String getName() {
+    return "fragtojson";
+  }
+
+  @Override
+  public String getShortDescription() {
+    return "Renders a binary-encoded Avro datum as JSON.";
+  }
+}

Added: 
hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileGetSchemaTool.java
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileGetSchemaTool.java?rev=894740&view=auto
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileGetSchemaTool.java 
(added)
+++ hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileGetSchemaTool.java 
Wed Dec 30 23:43:28 2009
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.File;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.generic.GenericDatumReader;
+
+/** Reads a data file to get its schema. */
+public class DataFileGetSchemaTool implements Tool {
+
+  @Override
+  public String getName() {
+    return "getschema";
+  }
+
+  @Override
+  public String getShortDescription() {
+    return "Prints out schema of an Avro data file.";
+  }
+
+  @Override
+  public int run(InputStream stdin, PrintStream out, PrintStream err,
+      List<String> args) throws Exception {
+    if (args.size() != 1) {
+      err.println("Expected 1 argument: input_file");
+      return 1;
+    }
+    DataFileReader<Void> reader =
+      new DataFileReader<Void>(new File(args.get(0)),
+                               new GenericDatumReader<Void>());
+    out.println(reader.getSchema().toString(true));
+    return 0;
+  }
+}

Added: hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileReadTool.java
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileReadTool.java?rev=894740&view=auto
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileReadTool.java 
(added)
+++ hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileReadTool.java Wed 
Dec 30 23:43:28 2009
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.File;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.io.Encoder;
+import org.apache.avro.io.JsonEncoder;
+import org.codehaus.jackson.JsonGenerator;
+
+/** Reads a data file and dumps to JSON */
+public class DataFileReadTool implements Tool {
+
+  @Override
+  public String getName() {
+    return "tojson";
+  }
+
+  @Override
+  public String getShortDescription() {
+    return "Dumps an Avro data file as JSON, one record per line.";
+  }
+
+  @Override
+  public int run(InputStream stdin, PrintStream out, PrintStream err,
+      List<String> args) throws Exception {
+    if (args.size() != 1) {
+      // Unlike other commands, "-" can't be used for stdin, because
+      // we can only use seekable files.
+      err.println("Expected 1 argument: input_file.");
+      return 1;
+    }
+
+    GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
+    DataFileReader<Object> fileReader =
+      new DataFileReader<Object>(new File(args.get(0)), reader);
+    try {
+      Schema schema = fileReader.getSchema();
+      DatumWriter<Object> writer = new GenericDatumWriter<Object>(schema);
+      Encoder encoder = new JsonEncoder(schema, (JsonGenerator)null);
+      Object datum;
+      while (null != (datum = fileReader.next(null))) {
+        // init() recreates the internal Jackson JsonGenerator
+        encoder.init(out);
+        writer.write(datum, encoder);
+        encoder.flush();
+        out.println();
+      }
+      out.flush();
+    } finally {
+      fileReader.close();
+    }
+    return 0;
+  }
+
+}

Added: hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileWriteTool.java
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileWriteTool.java?rev=894740&view=auto
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileWriteTool.java 
(added)
+++ hadoop/avro/trunk/src/java/org/apache/avro/tool/DataFileWriteTool.java Wed 
Dec 30 23:43:28 2009
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.EOFException;
+import java.io.InputStream;
+import java.io.DataInputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.io.Decoder;
+import org.apache.avro.io.JsonDecoder;
+
+/** Reads new-line delimited JSON records and writers an Avro data file. */
+public class DataFileWriteTool implements Tool {
+
+  @Override
+  public String getName() {
+    return "fromjson";
+  }
+
+  @Override
+  public String getShortDescription() {
+    return "Reads JSON records and writes an Avro data file.";
+  }
+
+  @Override
+  public int run(InputStream stdin, PrintStream out, PrintStream err,
+      List<String> args) throws Exception {
+    if (args.size() != 2) {
+      err.println("Expected 2 args: schema input_file");
+      return 1;
+    }
+    
+    Schema schema = Schema.parse(args.get(0));
+    DatumReader<Object> reader = new GenericDatumReader<Object>(schema);
+    
+    InputStream input = Util.fileOrStdin(args.get(1), stdin);
+    try {
+      DataInputStream din = new DataInputStream(input);
+      DataFileWriter<Object> writer =
+        new DataFileWriter<Object>(schema, out,
+                                   new GenericDatumWriter<Object>());
+      Decoder decoder = new JsonDecoder(schema, din);
+      Object datum;
+      while (true) {
+        try {
+          datum = reader.read(null, decoder);
+        } catch (EOFException e) {
+          break;
+        }
+        writer.append(datum);
+      }
+      writer.close();
+    } finally {
+      if (input != stdin) {
+        input.close();
+      }
+    }
+    return 0;
+  }
+}

Added: 
hadoop/avro/trunk/src/java/org/apache/avro/tool/JsonToBinaryFragmentTool.java
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/tool/JsonToBinaryFragmentTool.java?rev=894740&view=auto
==============================================================================
--- 
hadoop/avro/trunk/src/java/org/apache/avro/tool/JsonToBinaryFragmentTool.java 
(added)
+++ 
hadoop/avro/trunk/src/java/org/apache/avro/tool/JsonToBinaryFragmentTool.java 
Wed Dec 30 23:43:28 2009
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.io.BinaryEncoder;
+import org.apache.avro.io.JsonDecoder;
+
+/** Tool to convert JSON data into the binary form. */
+public class JsonToBinaryFragmentTool implements Tool {
+  @Override
+  public int run(InputStream stdin, PrintStream out, PrintStream err,
+      List<String> args) throws Exception {
+    if (args.size() != 2) {
+      err.println("Expected 2 arguments: schema json_data_file");
+      err.println("Use '-' as json_data_file for stdin.");
+      return 1;
+    }
+    Schema schema = Schema.parse(args.get(0));
+    InputStream input;
+    boolean needsClosing;
+    if (args.get(1).equals("-")) {
+      input = stdin;
+      needsClosing = false;
+    } else {
+      input = new FileInputStream(args.get(1));
+      needsClosing = true;
+    }
+    
+    try {
+    GenericDatumReader<Object> reader = 
+        new GenericDatumReader<Object>(schema);
+    Object datum = reader.read(null, new JsonDecoder(schema, input));
+    
+    GenericDatumWriter<Object> writer = 
+        new GenericDatumWriter<Object>(schema);
+    writer.write(datum, new BinaryEncoder(out));
+    } finally {
+      if (needsClosing) {
+        input.close();
+      }
+    }
+    return 0;
+  }
+
+  @Override
+  public String getName() {
+    return "jsontofrag";
+  }
+
+  @Override
+  public String getShortDescription() {
+    return "Renders a JSON-encoded Avro datum as binary.";
+  }
+}

Modified: hadoop/avro/trunk/src/java/org/apache/avro/tool/Main.java
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/tool/Main.java?rev=894740&r1=894739&r2=894740&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/tool/Main.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/tool/Main.java Wed Dec 30 
23:43:28 2009
@@ -37,7 +37,12 @@
     tools = new TreeMap<String, Tool>();
     for (Tool tool : new Tool[] {
         new SpecificCompilerTool(),
-        new InduceSchemaTool()
+        new InduceSchemaTool(),
+        new JsonToBinaryFragmentTool(),
+        new BinaryFragmentToJsonTool(),
+        new DataFileReadTool(),
+        new DataFileWriteTool(),
+        new DataFileGetSchemaTool(),
         }) {
       Tool prev = tools.put(tool.getName(), tool);
       if (prev != null) {

Added: hadoop/avro/trunk/src/java/org/apache/avro/tool/Util.java
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/tool/Util.java?rev=894740&view=auto
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/tool/Util.java (added)
+++ hadoop/avro/trunk/src/java/org/apache/avro/tool/Util.java Wed Dec 30 
23:43:28 2009
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.InputStream;
+
+/** Static utility methods for tools. */
+class Util {
+  /**
+   * Returns stdin if filename is "-", else opens the file
+   * and returns an InputStream for it.
+   */
+  static InputStream fileOrStdin(String filename, InputStream stdin) 
+      throws FileNotFoundException {
+    if (filename.equals("-")) {
+      return stdin;
+    } else {
+      return new FileInputStream(new File(filename));
+    }
+  }
+}

Modified: hadoop/avro/trunk/src/test/bin/test_avroj.sh
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/bin/test_avroj.sh?rev=894740&r1=894739&r2=894740&view=diff
==============================================================================
--- hadoop/avro/trunk/src/test/bin/test_avroj.sh (original)
+++ hadoop/avro/trunk/src/test/bin/test_avroj.sh Wed Dec 30 23:43:28 2009
@@ -62,6 +62,25 @@
 $CMD induce build/test/classes 'org.apache.avro.TestReflect$C' \
  | tr -d '\n ' | grep -q -F  '{"protocol":"C"'
 ######################################################################
+# Test to/from avro (both fragments and data files)
+$CMD jsontofrag '"string"' <(echo '"Long string implies readable length 
encoding."') \
+ | cmp -s - <(echo -n 'ZLong string implies readable length encoding.')
+$CMD fragtojson '"string"' <(printf \\006foo) \
+ | cmp -s - <(echo '"foo"')
+# And test that stdin support (via "-") works too
+echo '"The identity function"' \
+  | $CMD jsontofrag '"string"' - \
+  | $CMD fragtojson '"string"' - \
+  | cmp -s - <(echo '"The identity function"')
+
+$CMD fromjson '"string"' <(echo '"foo"'; echo '"bar"') \
+  > $TMPDIR/data_file_write.avro
+$CMD tojson $TMPDIR/data_file_write.avro \
+  | cmp -s - <(echo '"foo"'; echo '"bar"')
+$CMD getschema $TMPDIR/data_file_write.avro \
+  | cmp -s - <(echo '"string"')
+######################################################################
+
 $CMD 2>&1 | grep -q "Available tools:"
 $CMD doesnotexist 2>&1 | grep -q "Available tools:"
 ! $CMD 2>&1 > /dev/null

Added: hadoop/avro/trunk/src/test/java/org/apache/avro/AvroTestUtil.java
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/java/org/apache/avro/AvroTestUtil.java?rev=894740&view=auto
==============================================================================
--- hadoop/avro/trunk/src/test/java/org/apache/avro/AvroTestUtil.java (added)
+++ hadoop/avro/trunk/src/test/java/org/apache/avro/AvroTestUtil.java Wed Dec 
30 23:43:28 2009
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro;
+
+import java.io.File;
+
+/** Utilities for Avro tests. */
+public class AvroTestUtil {
+  static final File TMPDIR = new File(System.getProperty("test.dir", "/tmp"));
+
+  private AvroTestUtil() { }
+  
+  /** Create a temporary file in a test-appropriate directory. */
+  public static File tempFile(String name) {
+    return new File(TMPDIR, name);
+  }
+}

Added: 
hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestDataFileTools.java
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestDataFileTools.java?rev=894740&view=auto
==============================================================================
--- hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestDataFileTools.java 
(added)
+++ hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestDataFileTools.java 
Wed Dec 30 23:43:28 2009
@@ -0,0 +1,181 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Arrays;
+
+import org.apache.avro.AvroTestUtil;
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.tools.ant.filters.StringInputStream;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestDataFileTools {
+  static final int COUNT = 10;
+  static File sampleFile;
+  static String jsonData;
+  static Schema schema;
+  
+  @BeforeClass
+  public static void writeSampleFile() throws IOException {
+    sampleFile = AvroTestUtil.tempFile(TestDataFileTools.class + ".avro");
+    schema = Schema.create(Type.INT);
+    
+    DataFileWriter<Object> writer = new DataFileWriter<Object>(
+        schema, sampleFile, new GenericDatumWriter<Object>(schema));
+    StringBuilder builder = new StringBuilder();
+
+    for (int i = 0; i < COUNT; ++i) {
+      builder.append(Integer.toString(i));
+      builder.append("\n");
+      writer.append(i);
+    }
+
+    writer.flush();
+    writer.close();
+    
+    jsonData = builder.toString();
+  }
+  
+  @Test
+  public void testRead() throws Exception {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PrintStream p = new PrintStream(baos);
+    new DataFileReadTool().run(
+        null, // stdin
+        p, // stdout
+        null, // stderr
+        Arrays.asList(sampleFile.getPath()));
+    assertEquals(jsonData.toString(), baos.toString("UTF-8"));
+  }
+  
+  @Test
+  public void testGetSchema() throws Exception {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PrintStream p = new PrintStream(baos);
+    new DataFileGetSchemaTool().run(
+        null, // stdin
+        p, // stdout
+        null, // stderr
+        Arrays.asList(sampleFile.getPath()));
+    assertEquals(schema.toString() + "\n", baos.toString("UTF-8"));
+  }
+  
+  @Test
+  public void testWrite() throws Exception {
+    File outFile = AvroTestUtil.tempFile(
+        TestDataFileTools.class + ".testWrite." + ".avro");
+    FileOutputStream fout = new FileOutputStream(outFile);
+    PrintStream out = new PrintStream(fout);
+    new DataFileWriteTool().run(
+        new StringInputStream(jsonData),
+        new PrintStream(out), // stdout
+        null, // stderr
+        Arrays.asList(schema.toString(), "-"));
+    out.close();
+    fout.close();
+    
+    // Read it back, and make sure it's valid.
+    GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
+    DataFileReader<Object> fileReader = new 
DataFileReader<Object>(outFile,reader);
+    Object datum;
+    int i = 0;
+    while (null != (datum = fileReader.next(null))) {
+      assertEquals(i, datum);
+      i++;
+    }
+    assertEquals(COUNT, i);
+    assertEquals(schema, fileReader.getSchema());
+    assertEquals(COUNT, fileReader.getCount());
+  }
+  
+  @Test
+  public void testFailureOnWritingPartialJSONValues() throws Exception {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PrintStream out = new PrintStream(baos);
+    try {
+      new DataFileWriteTool().run(
+          new StringInputStream("{"),
+          new PrintStream(out), // stdout
+          null, // stderr
+          Arrays.asList("{ \"type\":\"record\", \"fields\":" +
+                        "[{\"name\":\"foo\", \"type\":\"string\"}], " +
+                        "\"name\":\"boring\" }", "-"));
+      fail("Expected exception.");
+    } catch (IOException expected) {
+      // expected
+    }
+  }
+  
+  @Test
+  public void testWritingZeroJsonValues() throws Exception {
+    File outFile = writeToAvroFile("zerojsonvalues",
+        schema.toString(),
+        "");
+    assertEquals(0, countRecords(outFile));
+  }
+  
+  private int countRecords(File outFile) throws IOException {
+    GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
+    DataFileReader<Object> fileReader = 
+      new DataFileReader<Object>(outFile,reader);
+    int i = 0;
+    while (null != fileReader.next(null)) {
+      i++;
+    }
+    return i;
+  }
+
+  @Test
+  public void testDifferentSeparatorsBetweenJsonRecords() throws Exception {
+    File outFile = writeToAvroFile(
+        "seperators", 
+        "{ \"type\":\"array\", \"items\":\"int\" }", 
+        "[]    [] []\n[][3]     ");
+    assertEquals(5, countRecords(outFile));
+  }
+  
+  public File writeToAvroFile(String testName, String schema, String json) 
throws Exception {
+    File outFile = AvroTestUtil.tempFile(
+        TestDataFileTools.class + "." + testName + ".avro");
+    FileOutputStream fout = new FileOutputStream(outFile);
+    PrintStream out = new PrintStream(fout);
+    new DataFileWriteTool().run(
+        new StringInputStream(json),
+        new PrintStream(out), // stdout
+        null, // stderr
+        Arrays.asList(schema, "-"));
+    out.close();
+    fout.close();
+    return outFile;
+  }
+  
+}

Added: 
hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestJsonToFromBinaryFragmentTools.java
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestJsonToFromBinaryFragmentTools.java?rev=894740&view=auto
==============================================================================
--- 
hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestJsonToFromBinaryFragmentTools.java
 (added)
+++ 
hadoop/avro/trunk/src/test/java/org/apache/avro/tool/TestJsonToFromBinaryFragmentTools.java
 Wed Dec 30 23:43:28 2009
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+import java.util.Arrays;
+
+import org.apache.avro.Schema;
+import org.junit.Test;
+
+/**
+ * Tests both {...@link JsonToBinaryFragmentTool} 
+ * and {...@link BinaryFragmentToJsonTool}.
+ */
+public class TestJsonToFromBinaryFragmentTools {
+  private static final Schema STRING_SCHEMA = Schema.parse("\"string\"");
+  private static final String UTF8 = "utf-8";
+  private static final String AVRO = 
+    "ZLong string implies readable length encoding.";
+  private static final String JSON = 
+    "\"Long string implies readable length encoding.\"";
+
+  @Test
+  public void testBinaryToJson() throws Exception {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PrintStream p = new PrintStream(baos);
+    
+    new BinaryFragmentToJsonTool().run(
+        new ByteArrayInputStream(AVRO.getBytes(UTF8)), // stdin
+        p, // stdout
+        null, // stderr
+        Arrays.asList(STRING_SCHEMA.toString(), "-"));
+    assertEquals(JSON + "\n", baos.toString(UTF8));
+  }
+  
+  @Test
+  public void testJsonToBinary() throws Exception {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PrintStream p = new PrintStream(baos);
+    new JsonToBinaryFragmentTool().run(
+        new ByteArrayInputStream(JSON.getBytes(UTF8)), // stdin
+        p, // stdout
+        null, // stderr
+        Arrays.asList(STRING_SCHEMA.toString(), "-"));
+    assertEquals(AVRO, baos.toString(UTF8));
+  }
+}


Reply via email to