This is an automated email from the ASF dual-hosted git repository.
rskraba pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new afbda07 AVRO-2689: add reader-schema to DataFileReadTool (#785)
afbda07 is described below
commit afbda0798a4fe662bd5379cf535da79800c4e840
Author: Roger Peppe <[email protected]>
AuthorDate: Tue Jan 28 10:03:38 2020 +0000
AVRO-2689: add reader-schema to DataFileReadTool (#785)
* AVRO-2689: add reader schema to DataFileReadTool
This PR adds --reader-schema and --reader-schema-file flags
to the tojson tool.
* AVRO-2689: Unit test for reader schema on tojson tool.
Co-authored-by: RyanSkraba <[email protected]>
---
.../org/apache/avro/tool/DataFileReadTool.java | 21 +++++++++++++++++++--
.../org/apache/avro/tool/TestDataFileTools.java | 22 ++++++++++++++++++++++
2 files changed, 41 insertions(+), 2 deletions(-)
diff --git
a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
index 8d4ef00..fb5ef72 100644
--- a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
+++ b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
@@ -56,11 +56,25 @@ public class DataFileReadTool implements Tool {
OptionSpec<Void> prettyOption = optionParser.accepts("pretty", "Turns on
pretty printing.");
String headDesc = String.format("Converts the first X records (default is
%d).", DEFAULT_HEAD_COUNT);
OptionSpec<String> headOption = optionParser.accepts("head",
headDesc).withOptionalArg();
+ OptionSpec<String> readerSchemaFileOption =
optionParser.accepts("reader-schema-file", "Reader schema file")
+ .withOptionalArg().ofType(String.class);
+ OptionSpec<String> readerSchemaOption =
optionParser.accepts("reader-schema", "Reader schema").withOptionalArg()
+ .ofType(String.class);
OptionSet optionSet = optionParser.parse(args.toArray(new String[0]));
Boolean pretty = optionSet.has(prettyOption);
List<String> nargs = new ArrayList<>((List<String>)
optionSet.nonOptionArguments());
+ String readerSchemaStr = readerSchemaOption.value(optionSet);
+ String readerSchemaFile = readerSchemaFileOption.value(optionSet);
+
+ Schema readerSchema = null;
+ if (readerSchemaFile != null) {
+ readerSchema = Util.parseSchemaFromFS(readerSchemaFile);
+ } else if (readerSchemaStr != null) {
+ readerSchema = new Schema.Parser().parse(readerSchemaStr);
+ }
+
long headCount = getHeadCount(optionSet, headOption, nargs);
if (nargs.size() != 1) {
@@ -73,9 +87,12 @@ public class DataFileReadTool implements Tool {
BufferedInputStream inStream = Util.fileOrStdin(nargs.get(0), stdin);
GenericDatumReader<Object> reader = new GenericDatumReader<>();
+ if (readerSchema != null) {
+ reader.setExpected(readerSchema);
+ }
try (DataFileStream<Object> streamReader = new DataFileStream<>(inStream,
reader)) {
- Schema schema = streamReader.getSchema();
- DatumWriter<Object> writer = new GenericDatumWriter<>(schema);
+ Schema schema = readerSchema != null ? readerSchema :
streamReader.getSchema();
+ DatumWriter writer = new GenericDatumWriter<>(schema);
JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, out,
pretty);
for (long recordCount = 0; streamReader.hasNext() && recordCount <
headCount; recordCount++) {
Object datum = streamReader.next();
diff --git
a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
index 00538aa..7f694c8 100644
--- a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
+++ b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
@@ -35,6 +35,7 @@ import java.util.Collections;
import java.util.List;
import org.apache.avro.AvroRuntimeException;
+import org.apache.avro.AvroTypeException;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Type;
import org.apache.avro.file.DataFileReader;
@@ -114,6 +115,27 @@ public class TestDataFileTools {
}
@Test
+ public void testReadWithReaderSchema() throws Exception {
+ assertEquals(jsonData, run(new DataFileReadTool(), "--reader-schema",
"\"long\"", sampleFile.getPath()));
+ }
+
+ @Test(expected = AvroTypeException.class)
+ public void testReadWithIncompatibleReaderSchema() throws Exception {
+ // Fails: an int can't be read as a string.
+ run(new DataFileReadTool(), "--reader-schema", "\"string\"",
sampleFile.getPath());
+ }
+
+ @Test
+ public void testReadWithReaderSchemaFile() throws Exception {
+ File readerSchemaFile = new File(DIR.getRoot(),
"reader-schema-temp.schema");
+ try (FileWriter fw = new FileWriter(readerSchemaFile)) {
+ fw.append("\"long\"");
+ }
+ assertEquals(jsonData,
+ run(new DataFileReadTool(), "--reader-schema-file",
readerSchemaFile.getPath(), sampleFile.getPath()));
+ }
+
+ @Test
public void testReadHeadDefaultCount() throws Exception {
String expectedJson = jsonData.substring(0, 20); // first 10 numbers
assertEquals(expectedJson, run(new DataFileReadTool(), "--head",
sampleFile.getPath()));