[
https://issues.apache.org/jira/browse/AVRO-3001?focusedWorklogId=803092&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-803092
]
ASF GitHub Bot logged work on AVRO-3001:
----------------------------------------
Author: ASF GitHub Bot
Created on: 24/Aug/22 04:21
Start Date: 24/Aug/22 04:21
Worklog Time Spent: 10m
Work Description: KalleOlaviNiemitalo commented on code in PR #1833:
URL: https://github.com/apache/avro/pull/1833#discussion_r953303802
##########
lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs:
##########
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Avro.IO.Parsing
+{
+ /// <summary>
+ /// The class that generates a grammar suitable to parse Avro data in JSON
+ /// format.
+ /// </summary>
+ public class JsonGrammarGenerator : ValidatingGrammarGenerator
+ {
+ /// <summary>
+ /// Returns the non-terminal that is the start symbol for the grammar
for the
+ /// grammar for the given schema <tt>sc</tt>.
Review Comment:
The parameter is named `schema`, not `sc`.
##########
lang/csharp/src/apache/main/IO/JsonEncoder.cs:
##########
@@ -0,0 +1,356 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Avro.IO.Parsing;
+using System.Collections;
+using System.IO;
+using System.Text;
+using Newtonsoft.Json;
+
+namespace Avro.IO
+{
+ /// <summary>
+ /// An <see cref="Encoder"/> for Avro's JSON data encoding.
+ ///
+ /// JsonEncoder buffers output, and data may not appear on the output until
+ /// <see cref="Encoder.Flush()"/> is called.
+ ///
+ /// JsonEncoder is not thread-safe.
+ /// </summary>
+ public class JsonEncoder : ParsingEncoder, Parser.IActionHandler
+ {
+ private readonly Parser parser;
+ private JsonWriter writer;
+ private bool includeNamespace = true;
+
+ // Has anything been written into the collections?
+ private readonly BitArray isEmpty = new BitArray(64);
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="JsonEncoder"/> class.
+ /// </summary>
+ public JsonEncoder(Schema sc, Stream stream) : this(sc,
getJsonWriter(stream, false))
+ {
+ }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="JsonEncoder"/> class.
+ /// </summary>
+ public JsonEncoder(Schema sc, Stream stream, bool pretty) : this(sc,
getJsonWriter(stream, pretty))
+ {
+ }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="JsonEncoder"/> class.
+ /// </summary>
+ public JsonEncoder(Schema sc, JsonWriter writer)
+ {
+ Configure(writer);
+ this.parser = new Parser((new
JsonGrammarGenerator()).Generate(sc), this);
+ }
+
+ /// <inheritdoc />
+ public override void Flush()
+ {
+ parser.ProcessImplicitActions();
+ if (writer != null)
+ {
+ writer.Flush();
+ }
+ }
+
+ // by default, one object per line.
+ // with pretty option use default pretty printer with root line
separator.
+ private static JsonWriter getJsonWriter(Stream stream, bool pretty)
+ {
+ JsonWriter writer = new JsonTextWriter(new StreamWriter(stream));
+ if (pretty)
+ {
+ writer.Formatting = Formatting.Indented;
+ }
+
+ return writer;
+ }
+
+ /// <summary>
+ /// Whether to include the namespace.
+ /// </summary>
+ public virtual bool IncludeNamespace
Review Comment:
Can you please change this documentation to say what it actually does, if it
is not related to namespaces used in the schema.
##########
lang/csharp/src/apache/main/IO/Parsing/ValidatingGrammarGenerator.cs:
##########
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Avro.Generic;
+
+namespace Avro.IO.Parsing
+{
+ /// <summary>
+ /// The class that generates validating grammar.
+ /// </summary>
+ public class ValidatingGrammarGenerator
+ {
+ /// <summary>
+ /// Returns the non-terminal that is the start symbol for the grammar
for the
+ /// given schema <tt>sc</tt>.
+ /// </summary>
+ public virtual Symbol Generate(Schema schema)
+ {
+ return Symbol.NewRoot(Generate(schema, new Dictionary<LitS,
Symbol>()));
+ }
+
+ /// <summary>
+ /// Returns the non-terminal that is the start symbol for the grammar
for the
+ /// given schema <tt>sc</tt>. If there is already an entry for the
given schema
+ /// in the given map <tt>seen</tt> then that entry is returned.
Otherwise a new
+ /// symbol is generated and an entry is inserted into the map.
+ /// </summary>
+ /// <param name="sc"> The schema for which the start symbol is
required </param>
+ /// <param name="seen"> A map of schema to symbol mapping done so far.
</param>
+ /// <returns> The start symbol for the schema </returns>
+ protected virtual Symbol Generate(Schema sc, IDictionary<LitS, Symbol>
seen)
+ {
+ switch (sc.Tag)
+ {
+ case Schema.Type.Null:
+ return Symbol.Null;
+ case Schema.Type.Boolean:
+ return Symbol.Boolean;
+ case Schema.Type.Int:
+ return Symbol.Int;
+ case Schema.Type.Long:
+ return Symbol.Long;
+ case Schema.Type.Float:
+ return Symbol.Float;
+ case Schema.Type.Double:
+ return Symbol.Double;
+ case Schema.Type.String:
+ return Symbol.String;
+ case Schema.Type.Bytes:
+ return Symbol.Bytes;
+ case Schema.Type.Fixed:
+ return Symbol.NewSeq(new
Symbol.IntCheckAction(((FixedSchema)sc).Size), Symbol.Fixed);
+ case Schema.Type.Enumeration:
+ return Symbol.NewSeq(new
Symbol.IntCheckAction(((EnumSchema)sc).Symbols.Count), Symbol.Enum);
Review Comment:
I guess this is where EnumAdjustAction would be used, if anywhere.
##########
lang/csharp/src/apache/main/IO/Parsing/Symbol.cs:
##########
@@ -0,0 +1,1049 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Avro.IO.Parsing
+{
+ /// <summary>
+ /// Symbol is the base of all symbols (terminals and non-terminals) of the
+ /// grammar.
+ /// </summary>
+ public abstract class Symbol
+ {
+ /// <summary>
+ /// The type of symbol.
+ /// </summary>
+ public enum Kind
+ {
+ /// <summary>
+ /// terminal symbols which have no productions </summary>
+ Terminal,
+
+ /// <summary>
+ /// Start symbol for some grammar </summary>
+ Root,
+
+ /// <summary>
+ /// non-terminal symbol which is a sequence of one or more other
symbols </summary>
+ Sequence,
+
+ /// <summary>
+ /// non-terminal to represent the contents of an array or map
</summary>
+ Repeater,
+
+ /// <summary>
+ /// non-terminal to represent the union </summary>
+ Alternative,
+
+ /// <summary>
+ /// non-terminal action symbol which are automatically consumed
</summary>
+ ImplicitAction,
+
+ /// <summary>
+ /// non-terminal action symbol which is explicitly consumed
</summary>
+ ExplicitAction
+ }
+
+ /// The kind of this symbol.
+ public readonly Kind SymKind;
+
+ /// <summary>
+ /// The production for this symbol. If this symbol is a terminal this
is
+ /// <tt>null</tt>. Otherwise this holds the the sequence of the
symbols that
+ /// forms the production for this symbol. The sequence is in the
reverse order of
+ /// production. This is useful for easy copying onto parsing stack.
+ ///
+ /// Please note that this is a final. So the production for a symbol
should be
+ /// known before that symbol is constructed. This requirement cannot
be met for
+ /// those symbols which are recursive (e.g. a record that holds union
a branch of
+ /// which is the record itself). To resolve this problem, we
initialize the
+ /// symbol with an array of nulls. Later we fill the symbols. Not
clean, but
+ /// works. The other option is to not have this field a final. But
keeping it
+ /// final and thus keeping symbol immutable gives some comfort. See
various
+ /// generators how we generate records.
+ /// </summary>
+ public readonly Symbol[] Production;
+
+ /// <summary>
+ /// Constructs a new symbol of the given kind.
+ /// </summary>
+ protected Symbol(Kind kind) : this(kind, null)
+ {
+ }
+
+ /// <summary>
+ /// Constructs a new symbol of the given kind and production.
+ /// </summary>
+ protected Symbol(Kind kind, Symbol[] production)
+ {
+ this.Production = production;
+ this.SymKind = kind;
+ }
+
+ /// <summary>
+ /// A convenience method to construct a root symbol.
+ /// </summary>
+ public static Symbol NewRoot(params Symbol[] symbols)
+ {
+ return new Root(symbols);
+ }
+
+ /// <summary>
+ /// A convenience method to construct a sequence.
+ /// </summary>
+ /// <param name="production"> The constituent symbols of the sequence.
</param>
+ public static Symbol NewSeq(params Symbol[] production)
+ {
+ return new Sequence(production);
+ }
+
+ /// <summary>
+ /// A convenience method to construct a repeater.
+ /// </summary>
+ /// <param name="endSymbol"> The end symbol. </param>
+ /// <param name="symsToRepeat"> The symbols to repeat in the repeater.
</param>
+ public static Symbol NewRepeat(Symbol endSymbol, params Symbol[]
symsToRepeat)
+ {
+ return new Repeater(endSymbol, symsToRepeat);
+ }
+
+ /// <summary>
+ /// A convenience method to construct a union.
+ /// </summary>
+ public static Symbol NewAlt(Symbol[] symbols, string[] labels)
+ {
+ return new Alternative(symbols, labels);
+ }
+
+ /// <summary>
+ /// A convenience method to construct an ErrorAction.
+ /// </summary>
+ /// <param name="e"> </param>
+ protected static Symbol Error(string e)
+ {
+ return new ErrorAction(e);
+ }
+
+ /// <summary>
+ /// A convenience method to construct a ResolvingAction.
+ /// </summary>
+ /// <param name="w"> The writer symbol </param>
+ /// <param name="r"> The reader symbol </param>
+ protected static Symbol Resolve(Symbol w, Symbol r)
+ {
+ return new ResolvingAction(w, r);
+ }
+
+ /// <summary>
+ /// Fixup symbol.
+ /// </summary>
+ protected class Fixup
+ {
+ private readonly Symbol[] symbols;
+
+ /// <summary>
+ /// The symbols.
+ /// </summary>
+ public Symbol[] Symbols
+ {
+ get { return (Symbol[])symbols.Clone(); }
+ }
+ /// <summary>
+ /// The position.
+ /// </summary>
+ public readonly int Pos;
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="Fixup"/> class.
+ /// </summary>
+ public Fixup(Symbol[] symbols, int pos)
+ {
+ this.symbols = (Symbol[])symbols.Clone();
+ this.Pos = pos;
+ }
+ }
+
+ /// <summary>
+ /// Flatten the given sub-array of symbols into a sub-array of symbols.
+ /// </summary>
+ protected virtual Symbol Flatten(IDictionary<Sequence, Sequence> map,
IDictionary<Sequence, IList<Fixup>> map2)
+ {
+ return this;
+ }
+
+ /// <summary>
+ /// Returns the flattened size.
+ /// </summary>
+ public virtual int FlattenedSize()
+ {
+ return 1;
+ }
+
+ /// <summary>
+ /// Flattens the given sub-array of symbols into an sub-array of
symbols. Every
+ /// <tt>Sequence</tt> in the input are replaced by its production
recursively.
+ /// Non-<tt>Sequence</tt> symbols, they internally have other symbols
those
+ /// internal symbols also get flattened. When flattening is done, the
only place
+ /// there might be Sequence symbols is in the productions of a
Repeater,
+ /// Alternative, or the symToParse and symToSkip in a
UnionAdjustAction or
+ /// SkipAction.
+ ///
+ /// Why is this done? We want our parsers to be fast. If we left the
grammars
+ /// unflattened, then the parser would be constantly copying the
contents of
+ /// nested Sequence productions onto the parsing stack. Instead,
because of
+ /// flattening, we have a long top-level production with no Sequences
unless the
+ /// Sequence is absolutely needed, e.g., in the case of a Repeater or
an
+ /// Alternative.
+ ///
+ /// Well, this is not exactly true when recursion is involved. Where
there is a
+ /// recursive record, that record will be "inlined" once, but any
internal (ie,
+ /// recursive) references to that record will be a Sequence for the
record. That
+ /// Sequence will not further inline itself
Issue Time Tracking
-------------------
Worklog Id: (was: 803092)
Time Spent: 4h 10m (was: 4h)
> JsonEncode Decode support for C#
> --------------------------------
>
> Key: AVRO-3001
> URL: https://issues.apache.org/jira/browse/AVRO-3001
> Project: Apache Avro
> Issue Type: Improvement
> Components: csharp
> Affects Versions: 1.10.0, 1.11.0
> Reporter: Krishnan Unni
> Assignee: Robert Yokota
> Priority: Major
> Labels: pull-request-available
> Time Spent: 4h 10m
> Remaining Estimate: 0h
>
> The C# library for avro currently supports only the Binary encoding and also
> with compile time types (Generic support only). As part of a project I am
> doing I need to validate the avro schema against the incoming json data on
> the fly without a predefined type (generated class). So basically comparing
> an avro schema (string/json representation) against a raw json string. It is
> possible with the Java library since it supports both non generic types and
> streams as well as json encoding. With C# currently this is not possible. Is
> there a plan to extend the C# library to provide these features? If yes, is
> there a timeline? If not is there any alternative to achieve this?
--
This message was sent by Atlassian Jira
(v8.20.10#820010)