This is an automated email from the ASF dual-hosted git repository.

rskraba pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new afbda07  AVRO-2689:  add reader-schema to DataFileReadTool (#785)
afbda07 is described below

commit afbda0798a4fe662bd5379cf535da79800c4e840
Author: Roger Peppe <[email protected]>
AuthorDate: Tue Jan 28 10:03:38 2020 +0000

    AVRO-2689:  add reader-schema to DataFileReadTool (#785)
    
    * AVRO-2689: add reader schema to DataFileReadTool
    
    This PR adds --reader-schema and --reader-schema-file flags
    to the tojson tool.
    
    * AVRO-2689: Unit test for reader schema on tojson tool.
    
    Co-authored-by: RyanSkraba <[email protected]>
---
 .../org/apache/avro/tool/DataFileReadTool.java     | 21 +++++++++++++++++++--
 .../org/apache/avro/tool/TestDataFileTools.java    | 22 ++++++++++++++++++++++
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git 
a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java 
b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
index 8d4ef00..fb5ef72 100644
--- a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
+++ b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
@@ -56,11 +56,25 @@ public class DataFileReadTool implements Tool {
     OptionSpec<Void> prettyOption = optionParser.accepts("pretty", "Turns on 
pretty printing.");
     String headDesc = String.format("Converts the first X records (default is 
%d).", DEFAULT_HEAD_COUNT);
     OptionSpec<String> headOption = optionParser.accepts("head", 
headDesc).withOptionalArg();
+    OptionSpec<String> readerSchemaFileOption = 
optionParser.accepts("reader-schema-file", "Reader schema file")
+        .withOptionalArg().ofType(String.class);
+    OptionSpec<String> readerSchemaOption = 
optionParser.accepts("reader-schema", "Reader schema").withOptionalArg()
+        .ofType(String.class);
 
     OptionSet optionSet = optionParser.parse(args.toArray(new String[0]));
     Boolean pretty = optionSet.has(prettyOption);
     List<String> nargs = new ArrayList<>((List<String>) 
optionSet.nonOptionArguments());
 
+    String readerSchemaStr = readerSchemaOption.value(optionSet);
+    String readerSchemaFile = readerSchemaFileOption.value(optionSet);
+
+    Schema readerSchema = null;
+    if (readerSchemaFile != null) {
+      readerSchema = Util.parseSchemaFromFS(readerSchemaFile);
+    } else if (readerSchemaStr != null) {
+      readerSchema = new Schema.Parser().parse(readerSchemaStr);
+    }
+
     long headCount = getHeadCount(optionSet, headOption, nargs);
 
     if (nargs.size() != 1) {
@@ -73,9 +87,12 @@ public class DataFileReadTool implements Tool {
     BufferedInputStream inStream = Util.fileOrStdin(nargs.get(0), stdin);
 
     GenericDatumReader<Object> reader = new GenericDatumReader<>();
+    if (readerSchema != null) {
+      reader.setExpected(readerSchema);
+    }
     try (DataFileStream<Object> streamReader = new DataFileStream<>(inStream, 
reader)) {
-      Schema schema = streamReader.getSchema();
-      DatumWriter<Object> writer = new GenericDatumWriter<>(schema);
+      Schema schema = readerSchema != null ? readerSchema : 
streamReader.getSchema();
+      DatumWriter writer = new GenericDatumWriter<>(schema);
       JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, out, 
pretty);
       for (long recordCount = 0; streamReader.hasNext() && recordCount < 
headCount; recordCount++) {
         Object datum = streamReader.next();
diff --git 
a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java 
b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
index 00538aa..7f694c8 100644
--- a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
+++ b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
@@ -35,6 +35,7 @@ import java.util.Collections;
 import java.util.List;
 
 import org.apache.avro.AvroRuntimeException;
+import org.apache.avro.AvroTypeException;
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Type;
 import org.apache.avro.file.DataFileReader;
@@ -114,6 +115,27 @@ public class TestDataFileTools {
   }
 
   @Test
+  public void testReadWithReaderSchema() throws Exception {
+    assertEquals(jsonData, run(new DataFileReadTool(), "--reader-schema", 
"\"long\"", sampleFile.getPath()));
+  }
+
+  @Test(expected = AvroTypeException.class)
+  public void testReadWithIncompatibleReaderSchema() throws Exception {
+    // Fails: an int can't be read as a string.
+    run(new DataFileReadTool(), "--reader-schema", "\"string\"", 
sampleFile.getPath());
+  }
+
+  @Test
+  public void testReadWithReaderSchemaFile() throws Exception {
+    File readerSchemaFile = new File(DIR.getRoot(), 
"reader-schema-temp.schema");
+    try (FileWriter fw = new FileWriter(readerSchemaFile)) {
+      fw.append("\"long\"");
+    }
+    assertEquals(jsonData,
+        run(new DataFileReadTool(), "--reader-schema-file", 
readerSchemaFile.getPath(), sampleFile.getPath()));
+  }
+
+  @Test
   public void testReadHeadDefaultCount() throws Exception {
     String expectedJson = jsonData.substring(0, 20); // first 10 numbers
     assertEquals(expectedJson, run(new DataFileReadTool(), "--head", 
sampleFile.getPath()));

Reply via email to