[ 
https://issues.apache.org/jira/browse/AVRO-3001?focusedWorklogId=804015&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-804015
 ]

ASF GitHub Bot logged work on AVRO-3001:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 26/Aug/22 17:05
            Start Date: 26/Aug/22 17:05
    Worklog Time Spent: 10m 
      Work Description: KyleSchoonover commented on code in PR #1833:
URL: https://github.com/apache/avro/pull/1833#discussion_r956225566


##########
lang/csharp/src/apache/main/IO/Parsing/Symbol.cs:
##########
@@ -0,0 +1,983 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Avro.IO.Parsing
+{
+    /// <summary>
+    /// Symbol is the base of all symbols (terminals and non-terminals) of the
+    /// grammar.
+    /// </summary>
+    public abstract class Symbol
+    {
+        /// <summary>
+        /// The type of symbol.
+        /// </summary>
+        public enum Kind
+        {
+            /// <summary>
+            /// terminal symbols which have no productions </summary>
+            Terminal,
+
+            /// <summary>
+            /// Start symbol for some grammar </summary>
+            Root,
+
+            /// <summary>
+            /// non-terminal symbol which is a sequence of one or more other 
symbols </summary>
+            Sequence,
+
+            /// <summary>
+            /// non-terminal to represent the contents of an array or map 
</summary>
+            Repeater,
+
+            /// <summary>
+            /// non-terminal to represent the union </summary>
+            Alternative,
+
+            /// <summary>
+            /// non-terminal action symbol which are automatically consumed 
</summary>
+            ImplicitAction,
+
+            /// <summary>
+            /// non-terminal action symbol which is explicitly consumed 
</summary>
+            ExplicitAction
+        }
+
+        /// The kind of this symbol.
+        public Kind SymKind { get; private set; }
+
+        /// <summary>
+        /// The production for this symbol. If this symbol is a terminal this 
is
+        /// <tt>null</tt>. Otherwise this holds the the sequence of the 
symbols that
+        /// forms the production for this symbol. The sequence is in the 
reverse order of
+        /// production. This is useful for easy copying onto parsing stack.
+        ///
+        /// Please note that this is a final. So the production for a symbol 
should be
+        /// known before that symbol is constructed. This requirement cannot 
be met for
+        /// those symbols which are recursive (e.g. a record that holds union 
a branch of
+        /// which is the record itself). To resolve this problem, we 
initialize the
+        /// symbol with an array of nulls. Later we fill the symbols. Not 
clean, but
+        /// works. The other option is to not have this field a final. But 
keeping it
+        /// final and thus keeping symbol immutable gives some comfort. See 
various
+        /// generators how we generate records.
+        /// </summary>
+        public Symbol[] Production { get; private set; }
+
+        /// <summary>
+        /// Constructs a new symbol of the given kind.
+        /// </summary>
+        protected Symbol(Kind kind) : this(kind, null)
+        {
+        }
+
+        /// <summary>
+        /// Constructs a new symbol of the given kind and production.
+        /// </summary>
+        protected Symbol(Kind kind, Symbol[] production)
+        {
+            Production = production;
+            SymKind = kind;
+        }
+
+        /// <summary>
+        /// A convenience method to construct a root symbol.
+        /// </summary>
+        public static Symbol NewRoot(params Symbol[] symbols) => new 
Root(symbols);
+
+        /// <summary>
+        /// A convenience method to construct a sequence.
+        /// </summary>
+        /// <param name="production"> The constituent symbols of the sequence. 
</param>
+        public static Symbol NewSeq(params Symbol[] production) => new 
Sequence(production);
+
+        /// <summary>
+        /// A convenience method to construct a repeater.
+        /// </summary>
+        /// <param name="endSymbol"> The end symbol. </param>
+        /// <param name="symsToRepeat"> The symbols to repeat in the repeater. 
</param>
+        public static Symbol NewRepeat(Symbol endSymbol, params Symbol[] 
symsToRepeat) =>
+            new Repeater(endSymbol, symsToRepeat);
+
+        /// <summary>
+        /// A convenience method to construct a union.
+        /// </summary>
+        public static Symbol NewAlt(Symbol[] symbols, string[] labels) => new 
Alternative(symbols, labels);
+
+        /// <summary>
+        /// A convenience method to construct an ErrorAction.
+        /// </summary>
+        /// <param name="e"> </param>
+        protected static Symbol Error(string e) => new ErrorAction(e);
+
+        /// <summary>
+        /// A convenience method to construct a ResolvingAction.
+        /// </summary>
+        /// <param name="w"> The writer symbol </param>
+        /// <param name="r"> The reader symbol </param>
+        protected static Symbol Resolve(Symbol w, Symbol r) => new 
ResolvingAction(w, r);
+
+        /// <summary>
+        /// Fixup symbol.
+        /// </summary>
+        protected class Fixup
+        {
+            private readonly Symbol[] symbols;
+
+            /// <summary>
+            /// The symbols.
+            /// </summary>
+            public Symbol[] Symbols
+            {
+                get { return (Symbol[])symbols.Clone(); }
+            }
+
+            /// <summary>
+            /// The position.
+            /// </summary>
+            public int Pos { get; private set; }
+
+            /// <summary>
+            /// Initializes a new instance of the <see cref="Fixup"/> class.
+            /// </summary>
+            public Fixup(Symbol[] symbols, int pos)
+            {
+                this.symbols = (Symbol[])symbols.Clone();
+                Pos = pos;
+            }
+        }
+
+        /// <summary>
+        /// Flatten the given sub-array of symbols into a sub-array of symbols.
+        /// </summary>
+        protected virtual Symbol Flatten(IDictionary<Sequence, Sequence> map, 
IDictionary<Sequence, IList<Fixup>> map2) => this;
+
+        /// <summary>
+        /// Returns the flattened size.
+        /// </summary>
+        public virtual int FlattenedSize() => 1;
+
+        /// <summary>
+        /// Flattens the given sub-array of symbols into an sub-array of 
symbols. Every
+        /// <tt>Sequence</tt> in the input are replaced by its production 
recursively.
+        /// Non-<tt>Sequence</tt> symbols, they internally have other symbols 
those
+        /// internal symbols also get flattened. When flattening is done, the 
only place
+        /// there might be Sequence symbols is in the productions of a 
Repeater,
+        /// Alternative, or the symToParse and symToSkip in a 
UnionAdjustAction or
+        /// SkipAction.
+        ///
+        /// Why is this done? We want our parsers to be fast. If we left the 
grammars
+        /// unflattened, then the parser would be constantly copying the 
contents of
+        /// nested Sequence productions onto the parsing stack. Instead, 
because of
+        /// flattening, we have a long top-level production with no Sequences 
unless the
+        /// Sequence is absolutely needed, e.g., in the case of a Repeater or 
an
+        /// Alternative.
+        ///
+        /// Well, this is not exactly true when recursion is involved. Where 
there is a
+        /// recursive record, that record will be "inlined" once, but any 
internal (ie,
+        /// recursive) references to that record will be a Sequence for the 
record. That
+        /// Sequence will not further inline itself 

Issue Time Tracking
-------------------

    Worklog Id:     (was: 804015)
    Time Spent: 8.5h  (was: 8h 20m)

> JsonEncode Decode support for C#
> --------------------------------
>
>                 Key: AVRO-3001
>                 URL: https://issues.apache.org/jira/browse/AVRO-3001
>             Project: Apache Avro
>          Issue Type: Improvement
>          Components: csharp
>    Affects Versions: 1.10.0, 1.11.0
>            Reporter: Krishnan Unni
>            Assignee: Robert Yokota
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: 1.12.0
>
>          Time Spent: 8.5h
>  Remaining Estimate: 0h
>
> The C# library for avro currently supports only the Binary encoding and also 
> with compile time types (Generic support only). As part of a project I am 
> doing I need to validate the avro schema against the incoming json data on 
> the fly without a predefined type (generated class). So basically comparing 
> an avro schema (string/json representation) against a raw json string. It is 
> possible with the Java library since it supports both non generic types and 
> streams as well as json encoding. With C# currently this is not possible. Is 
> there a plan to extend the C# library to provide these features? If yes, is 
> there a timeline? If not is there any alternative to achieve this? 



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to