[
https://issues.apache.org/jira/browse/AVRO-2890?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Sharath Avadoot Gururaj updated AVRO-2890:
------------------------------------------
Description:
Consider the following schema:
{code:java}
{"namespace": "example.avro",
"type": "record",
"name": "Nic",
"fields": [
{"name" : "ip", "type" : "string", "default" : ""}
]
}
and the following empty json{code}
{code:java}
{}{code}
I expect that parsing is successful with this code
{code:java}
public void jsonToAvro() throws Exception {
JsonParser parser;
Schema schema = new
Schema.Parser().parse(readClasspathFile(s.schema));
Decoder decoder;
JsonFactory factory = new JsonFactory();
if(s.linesep) {
parser =
factory.createParser(Files.newInputStream(Paths.get(s.input)));
decoder = DecoderFactory.get().jsonDecoder(schema,
Files.newInputStream(Paths.get(s.input)));
} else {
parser =
factory.createParser(Files.readAllBytes(Paths.get(s.input)));
decoder = DecoderFactory.get().jsonDecoder(schema, new
String(Files.readAllBytes(Paths.get(s.input))));
}
parser.configure(JsonParser.Feature.INCLUDE_SOURCE_IN_LOCATION,
true);
// Decoder decoder = new ExtendedJsonDecoder(schema, parser, true );
DataFileWriter<GenericRecord> writer;
CountingOutputStream output = new
CountingOutputStream(Files.newOutputStream(Paths.get(s.output)));
DatumReader<GenericRecord> reader = new
GenericDatumReader<>(schema);
writer = new DataFileWriter<>(new GenericDatumWriter<>());
writer.create(schema, output);
// Decoder decoder = new ExtendedJsonDecoder(schema, parser, true );
GenericRecord datum = null;
while (true) {
try {
datum = reader.read(datum, decoder);
} catch (EOFException eofe) {
break;
}
writer.append(datum);
}
writer.flush();
}
{code}
But I get the following error
{noformat}
org.apache.avro.AvroTypeException: Expected field name not found: ip
at org.apache.avro.io.JsonDecoder.doAction(JsonDecoder.java:473)
~[avro-1.10.0.jar:1.10.0]
at org.apache.avro.io.parsing.Parser.advance(Parser.java:86)
~[avro-1.10.0.jar:1.10.0]
at org.apache.avro.io.JsonDecoder.advance(JsonDecoder.java:132)
~[avro-1.10.0.jar:1.10.0]
at org.apache.avro.io.JsonDecoder.readString(JsonDecoder.java:212)
~[avro-1.10.0.jar:1.10.0]
at org.apache.avro.io.JsonDecoder.readString(JsonDecoder.java:207)
~[avro-1.10.0.jar:1.10.0]
at
org.apache.avro.io.ResolvingDecoder.readString(ResolvingDecoder.java:208)
~[avro-1.10.0.jar:1.10.0]
at
org.apache.avro.generic.GenericDatumReader.readString(GenericDatumReader.java:469)
~[avro-1.10.0.jar:1.10.0]
at
org.apache.avro.generic.GenericDatumReader.readString(GenericDatumReader.java:459)
~[avro-1.10.0.jar:1.10.0]
at
org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:191)
~[avro-1.10.0.jar:1.10.0]
at
org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:160)
~[avro-1.10.0.jar:1.10.0]
at
org.apache.avro.generic.GenericDatumReader.readField(GenericDatumReader.java:259)
~[avro-1.10.0.jar:1.10.0]
at
org.apache.avro.generic.GenericDatumReader.readRecord(GenericDatumReader.java:247)
~[avro-1.10.0.jar:1.10.0]
at
org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:179)
~[avro-1.10.0.jar:1.10.0]
at
org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:160)
~[avro-1.10.0.jar:1.10.0]
at
org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:153)
~[avro-1.10.0.jar:1.10.0]
at sha.Deser.jsonToAvro(Deser.java:101) ~[classes/:?]
at sha.Deser.go(Deser.java:70) ~[classes/:?]
at sha.Deser.main(Deser.java:43) [classes/:?]
{noformat}
was:
Consider the following schema:
{code:java}
{"namespace": "example.avro", "type": "record", "name": "Nic", "fields": [
{"name" : "ip", "type" : "string", "default" : ""} ] }
{code}
and the following empty json
{code:java}
{}{code}
I expect that parsing is successful with this code
{code:java}
public void jsonToAvro() throws Exception { JsonParser parser; Schema schema =
new Schema.Parser().parse(readClasspathFile(s.schema)); Decoder decoder;
JsonFactory factory = new JsonFactory(); if(s.linesep) { parser =
factory.createParser(Files.newInputStream(Paths.get(s.input))); decoder =
DecoderFactory.get().jsonDecoder(schema,
Files.newInputStream(Paths.get(s.input))); } else { parser =
factory.createParser(Files.readAllBytes(Paths.get(s.input))); decoder =
DecoderFactory.get().jsonDecoder(schema, new
String(Files.readAllBytes(Paths.get(s.input)))); }
parser.configure(JsonParser.Feature.INCLUDE_SOURCE_IN_LOCATION, true); //
Decoder decoder = new ExtendedJsonDecoder(schema, parser, true );
DataFileWriter<GenericRecord> writer; CountingOutputStream output = new
CountingOutputStream(Files.newOutputStream(Paths.get(s.output)));
DatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); writer =
new DataFileWriter<>(new GenericDatumWriter<>()); writer.create(schema,
output); // Decoder decoder = new ExtendedJsonDecoder(schema, parser, true );
GenericRecord datum = null; while (true) { try { datum = reader.read(datum,
decoder); } catch (EOFException eofe) { break; } writer.append(datum); }
writer.flush(); }
{code}
But I get the following error
{noformat}
org.apache.avro.AvroTypeException: Expected field name not found: ip at
org.apache.avro.io.JsonDecoder.doAction(JsonDecoder.java:473)
~[avro-1.10.0.jar:1.10.0] at
org.apache.avro.io.parsing.Parser.advance(Parser.java:86)
~[avro-1.10.0.jar:1.10.0] at
org.apache.avro.io.JsonDecoder.advance(JsonDecoder.java:132)
~[avro-1.10.0.jar:1.10.0] at
org.apache.avro.io.JsonDecoder.readString(JsonDecoder.java:212)
~[avro-1.10.0.jar:1.10.0] at
org.apache.avro.io.JsonDecoder.readString(JsonDecoder.java:207)
~[avro-1.10.0.jar:1.10.0] at
org.apache.avro.io.ResolvingDecoder.readString(ResolvingDecoder.java:208)
~[avro-1.10.0.jar:1.10.0] at
org.apache.avro.generic.GenericDatumReader.readString(GenericDatumReader.java:469)
~[avro-1.10.0.jar:1.10.0] at
org.apache.avro.generic.GenericDatumReader.readString(GenericDatumReader.java:459)
~[avro-1.10.0.jar:1.10.0] at
org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:191)
~[avro-1.10.0.jar:1.10.0] at
org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:160)
~[avro-1.10.0.jar:1.10.0] at
org.apache.avro.generic.GenericDatumReader.readField(GenericDatumReader.java:259)
~[avro-1.10.0.jar:1.10.0] at
org.apache.avro.generic.GenericDatumReader.readRecord(GenericDatumReader.java:247)
~[avro-1.10.0.jar:1.10.0] at
org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:179)
~[avro-1.10.0.jar:1.10.0] at
org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:160)
~[avro-1.10.0.jar:1.10.0] at
org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:153)
~[avro-1.10.0.jar:1.10.0] at sha.Deser.jsonToAvro(Deser.java:101) ~[classes/:?]
at sha.Deser.go(Deser.java:70) ~[classes/:?] at sha.Deser.main(Deser.java:43)
[classes/:?]
{noformat}
> java JSON decoder does not respect default values for fields
> ------------------------------------------------------------
>
> Key: AVRO-2890
> URL: https://issues.apache.org/jira/browse/AVRO-2890
> Project: Apache Avro
> Issue Type: Bug
> Components: java
> Affects Versions: 1.10.0
> Reporter: Sharath Avadoot Gururaj
> Priority: Major
>
> Consider the following schema:
> {code:java}
> {"namespace": "example.avro",
> "type": "record",
> "name": "Nic",
> "fields": [
> {"name" : "ip", "type" : "string", "default" : ""}
> ]
> }
> and the following empty json{code}
> {code:java}
> {}{code}
> I expect that parsing is successful with this code
> {code:java}
> public void jsonToAvro() throws Exception {
> JsonParser parser;
> Schema schema = new
> Schema.Parser().parse(readClasspathFile(s.schema));
> Decoder decoder;
> JsonFactory factory = new JsonFactory();
> if(s.linesep) {
> parser =
> factory.createParser(Files.newInputStream(Paths.get(s.input)));
> decoder = DecoderFactory.get().jsonDecoder(schema,
> Files.newInputStream(Paths.get(s.input)));
> } else {
> parser =
> factory.createParser(Files.readAllBytes(Paths.get(s.input)));
> decoder = DecoderFactory.get().jsonDecoder(schema, new
> String(Files.readAllBytes(Paths.get(s.input))));
> }
> parser.configure(JsonParser.Feature.INCLUDE_SOURCE_IN_LOCATION,
> true);
> // Decoder decoder = new ExtendedJsonDecoder(schema, parser, true );
> DataFileWriter<GenericRecord> writer;
> CountingOutputStream output = new
> CountingOutputStream(Files.newOutputStream(Paths.get(s.output)));
> DatumReader<GenericRecord> reader = new
> GenericDatumReader<>(schema);
> writer = new DataFileWriter<>(new GenericDatumWriter<>());
> writer.create(schema, output);
> // Decoder decoder = new ExtendedJsonDecoder(schema, parser, true );
> GenericRecord datum = null;
> while (true) {
> try {
> datum = reader.read(datum, decoder);
> } catch (EOFException eofe) {
> break;
> }
> writer.append(datum);
> }
> writer.flush();
> }
> {code}
> But I get the following error
>
> {noformat}
> org.apache.avro.AvroTypeException: Expected field name not found: ip
> at org.apache.avro.io.JsonDecoder.doAction(JsonDecoder.java:473)
> ~[avro-1.10.0.jar:1.10.0]
> at org.apache.avro.io.parsing.Parser.advance(Parser.java:86)
> ~[avro-1.10.0.jar:1.10.0]
> at org.apache.avro.io.JsonDecoder.advance(JsonDecoder.java:132)
> ~[avro-1.10.0.jar:1.10.0]
> at org.apache.avro.io.JsonDecoder.readString(JsonDecoder.java:212)
> ~[avro-1.10.0.jar:1.10.0]
> at org.apache.avro.io.JsonDecoder.readString(JsonDecoder.java:207)
> ~[avro-1.10.0.jar:1.10.0]
> at
> org.apache.avro.io.ResolvingDecoder.readString(ResolvingDecoder.java:208)
> ~[avro-1.10.0.jar:1.10.0]
> at
> org.apache.avro.generic.GenericDatumReader.readString(GenericDatumReader.java:469)
> ~[avro-1.10.0.jar:1.10.0]
> at
> org.apache.avro.generic.GenericDatumReader.readString(GenericDatumReader.java:459)
> ~[avro-1.10.0.jar:1.10.0]
> at
> org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:191)
> ~[avro-1.10.0.jar:1.10.0]
> at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:160)
> ~[avro-1.10.0.jar:1.10.0]
> at
> org.apache.avro.generic.GenericDatumReader.readField(GenericDatumReader.java:259)
> ~[avro-1.10.0.jar:1.10.0]
> at
> org.apache.avro.generic.GenericDatumReader.readRecord(GenericDatumReader.java:247)
> ~[avro-1.10.0.jar:1.10.0]
> at
> org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:179)
> ~[avro-1.10.0.jar:1.10.0]
> at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:160)
> ~[avro-1.10.0.jar:1.10.0]
> at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:153)
> ~[avro-1.10.0.jar:1.10.0]
> at sha.Deser.jsonToAvro(Deser.java:101) ~[classes/:?]
> at sha.Deser.go(Deser.java:70) ~[classes/:?]
> at sha.Deser.main(Deser.java:43) [classes/:?]
> {noformat}
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)