[
https://issues.apache.org/jira/browse/AVRO-1176?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Thiruvalluvan M. G. reassigned AVRO-1176:
-----------------------------------------
Assignee: Thiruvalluvan M. G.
> ResolvingDecoder fails to resolve or parse schemas
> --------------------------------------------------
>
> Key: AVRO-1176
> URL: https://issues.apache.org/jira/browse/AVRO-1176
> Project: Apache Avro
> Issue Type: Bug
> Components: c++
> Affects Versions: 1.7.0
> Reporter: Keh-Li Sheng
> Assignee: Thiruvalluvan M. G.
> Priority: Major
> Labels: patch
> Fix For: 1.9.0
>
> Attachments: AVRO-1176.patch
>
>
> We have encountered a number of problems using ResolvingDecoder in the C++
> project that we can trace to
> 1. Incorrectly swapped reader/writer arguments passed to
> ResolvingGrammarGenerator::generate()
> 2. Using the wrong tree in ResolvingGrammarGenerator::generate() to generate
> the backup parsing stack
> 3. A decoder has no "hook" into the generated codec_traits decode methods for
> Specific that advances the resolved parse tree through the Symbol::sSkipStart
> nodes to ignore extra or unknown fields in the writer's data.
> 4. A resolving decoder can generate a valid decoded object even if there are
> garbage characters at the end of the input stream if those characters appear
> in a field that the reader schema is unaware of
> Reader/Writer schemas that fail to parse properly below. First example is the
> writer adding a field to a record that is inside an array
> {code:title=Added field to record inside array}
> {
> std::string
> readerString("{\"type\":\"record\",\"name\":\"Outer\",\"fields\":[{\"name\":\"outerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}");
> std::string
> writerString("{\"type\":\"record\",\"name\":\"Outer\",\"fields\":[{\"name\":\"outerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"third\",\"type\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}]}}}]}");
> std::stringstream readerStream(readerString);
> std::stringstream writerStream(writerString);
>
> avro::ValidSchema readerSchema;
> avro::ValidSchema writerSchema;
>
> avro::compileJsonSchema(readerStream, readerSchema);
> avro::compileJsonSchema(writerStream, writerSchema);
>
> avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema,
> readerSchema, avro::jsonDecoder(writerSchema));
> struct Outer outer;
>
> std::stringstream
> jsonStream("{\"outerArray\":[{\"first\":{\"field\":\"here is a string
> field\"},\"second\":{\"field\":\"here is another string
> field\"},\"third\":{\"number\":3}},{\"first\":{\"field\":\"cool\"},\"second\":{\"field\":\"beans\"},\"third\":{\"number\":4}}]}");
> std::auto_ptr<avro::InputStream> input =
> avro::istreamInputStream(jsonStream);
> decoder->init(*input);
> avro::decode(*decoder, outer);
> }
> {code}
> {code:title=Additional array of writer-only record}
> {
> std::string
> readerString("{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}");
> std::string
> writerString("{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"innerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}}]}}}]}");
> std::stringstream readerStream(readerString);
> std::stringstream writerStream(writerString);
>
> avro::ValidSchema readerSchema;
> avro::ValidSchema writerSchema;
>
> avro::compileJsonSchema(readerStream, readerSchema);
> avro::compileJsonSchema(writerStream, writerSchema);
>
> avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema,
> readerSchema, avro::jsonDecoder(writerSchema));
> struct Outer outer;
>
> std::stringstream
> jsonStream("{\"extraArray\":[{\"first\":{\"field\":\"here is a string
> field\"},\"second\":{\"field\":\"here is another string
> field\"},\"innerArray\":[{\"number\":1},{\"number\":2},{\"number\":3}]},{\"first\":{\"field\":\"second
> item in array\"},\"second\":{\"field\":\"inner2 field of
> 2\"},\"innerArray\":[{\"number\":4},{\"number\":5}]},{\"first\":{\"field\":\"third
> item in array\"},\"second\":{\"field\":\"inner2 field of
> 3\"},\"innerArray\":[{\"number\":6}]}]}");
> std::auto_ptr<avro::InputStream> input =
> avro::istreamInputStream(jsonStream);
> decoder->init(*input);
> avro::decode(*decoder, outer);
> }
> {code}
> {code:title=Multiple nesting of unknown records}
> {
> std::string
> readerString("{\"type\":\"record\",\"name\":\"CombinationExtra\",\"fields\":[{\"name\":\"outerAsField\",\"type\":{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}}]}");
> std::string
> writerString("{\"type\":\"record\",\"name\":\"CombinationExtra\",\"fields\":[{\"name\":\"outerAsField\",\"type\":{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"innerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}}]}}}]}}]}");
> std::stringstream readerStream(readerString);
> std::stringstream writerStream(writerString);
>
> avro::ValidSchema readerSchema;
> avro::ValidSchema writerSchema;
>
> avro::compileJsonSchema(readerStream, readerSchema);
> avro::compileJsonSchema(writerStream, writerSchema);
>
> avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema,
> readerSchema, avro::jsonDecoder(writerSchema));
> struct Outer outer;
>
> std::stringstream
> jsonStream("{\"outerAsField\":{\"extraArray\":[{\"first\":{\"field\":\"here
> is a string field\"},\"second\":{\"field\":\"here is another string
> field\"},\"innerArray\":[{\"number\":1},{\"number\":2},{\"number\":3}]}]}}");
> std::auto_ptr<avro::InputStream> input =
> avro::istreamInputStream(jsonStream);
> decoder->init(*input);
> avro::decode(*decoder, outer);
> }
> {code}
> The following will generate a proper object according to the reader schema
> and completely ignores the extraneous characters at the end of the stream.
> {code:title=Garbage after appended field of new record}
> {
> std::string
> readerString("{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}");
> std::string
> writerString("{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"third\",\"type\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}]}");
> std::stringstream readerStream(readerString);
> std::stringstream writerStream(writerString);
>
> avro::ValidSchema readerSchema;
> avro::ValidSchema writerSchema;
>
> avro::compileJsonSchema(readerStream, readerSchema);
> avro::compileJsonSchema(writerStream, writerSchema);
>
> avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema,
> readerSchema, avro::jsonDecoder(writerSchema));
> struct Outer outer;
>
> std::stringstream jsonStream("{\"first\":{\"field\":\"here is a string
> field\"},\"second\":{\"field\":\"here is another string
> field\"},\"third\":{\"number\":3} GARBAGE_HERE}");
> std::auto_ptr<avro::InputStream> input =
> avro::istreamInputStream(jsonStream);
> decoder->init(*input);
> avro::decode(*decoder, outer);
> }
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)