[
https://issues.apache.org/jira/browse/AVRO-3001?focusedWorklogId=803103&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-803103
]
ASF GitHub Bot logged work on AVRO-3001:
----------------------------------------
Author: ASF GitHub Bot
Created on: 24/Aug/22 04:47
Start Date: 24/Aug/22 04:47
Worklog Time Spent: 10m
Work Description: rayokota commented on code in PR #1833:
URL: https://github.com/apache/avro/pull/1833#discussion_r953339960
##########
lang/csharp/src/apache/main/IO/Parsing/Symbol.cs:
##########
@@ -0,0 +1,1049 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Avro.IO.Parsing
+{
+ /// <summary>
+ /// Symbol is the base of all symbols (terminals and non-terminals) of the
+ /// grammar.
+ /// </summary>
+ public abstract class Symbol
+ {
+ /// <summary>
+ /// The type of symbol.
+ /// </summary>
+ public enum Kind
+ {
+ /// <summary>
+ /// terminal symbols which have no productions </summary>
+ Terminal,
+
+ /// <summary>
+ /// Start symbol for some grammar </summary>
+ Root,
+
+ /// <summary>
+ /// non-terminal symbol which is a sequence of one or more other
symbols </summary>
+ Sequence,
+
+ /// <summary>
+ /// non-terminal to represent the contents of an array or map
</summary>
+ Repeater,
+
+ /// <summary>
+ /// non-terminal to represent the union </summary>
+ Alternative,
+
+ /// <summary>
+ /// non-terminal action symbol which are automatically consumed
</summary>
+ ImplicitAction,
+
+ /// <summary>
+ /// non-terminal action symbol which is explicitly consumed
</summary>
+ ExplicitAction
+ }
+
+ /// The kind of this symbol.
+ public readonly Kind SymKind;
+
+ /// <summary>
+ /// The production for this symbol. If this symbol is a terminal this
is
+ /// <tt>null</tt>. Otherwise this holds the the sequence of the
symbols that
+ /// forms the production for this symbol. The sequence is in the
reverse order of
+ /// production. This is useful for easy copying onto parsing stack.
+ ///
+ /// Please note that this is a final. So the production for a symbol
should be
+ /// known before that symbol is constructed. This requirement cannot
be met for
+ /// those symbols which are recursive (e.g. a record that holds union
a branch of
+ /// which is the record itself). To resolve this problem, we
initialize the
+ /// symbol with an array of nulls. Later we fill the symbols. Not
clean, but
+ /// works. The other option is to not have this field a final. But
keeping it
+ /// final and thus keeping symbol immutable gives some comfort. See
various
+ /// generators how we generate records.
+ /// </summary>
+ public readonly Symbol[] Production;
+
+ /// <summary>
+ /// Constructs a new symbol of the given kind.
+ /// </summary>
+ protected Symbol(Kind kind) : this(kind, null)
+ {
+ }
+
+ /// <summary>
+ /// Constructs a new symbol of the given kind and production.
+ /// </summary>
+ protected Symbol(Kind kind, Symbol[] production)
+ {
+ this.Production = production;
+ this.SymKind = kind;
+ }
+
+ /// <summary>
+ /// A convenience method to construct a root symbol.
+ /// </summary>
+ public static Symbol NewRoot(params Symbol[] symbols)
+ {
+ return new Root(symbols);
+ }
+
+ /// <summary>
+ /// A convenience method to construct a sequence.
+ /// </summary>
+ /// <param name="production"> The constituent symbols of the sequence.
</param>
+ public static Symbol NewSeq(params Symbol[] production)
+ {
+ return new Sequence(production);
+ }
+
+ /// <summary>
+ /// A convenience method to construct a repeater.
+ /// </summary>
+ /// <param name="endSymbol"> The end symbol. </param>
+ /// <param name="symsToRepeat"> The symbols to repeat in the repeater.
</param>
+ public static Symbol NewRepeat(Symbol endSymbol, params Symbol[]
symsToRepeat)
+ {
+ return new Repeater(endSymbol, symsToRepeat);
+ }
+
+ /// <summary>
+ /// A convenience method to construct a union.
+ /// </summary>
+ public static Symbol NewAlt(Symbol[] symbols, string[] labels)
+ {
+ return new Alternative(symbols, labels);
+ }
+
+ /// <summary>
+ /// A convenience method to construct an ErrorAction.
+ /// </summary>
+ /// <param name="e"> </param>
+ protected static Symbol Error(string e)
+ {
+ return new ErrorAction(e);
+ }
+
+ /// <summary>
+ /// A convenience method to construct a ResolvingAction.
+ /// </summary>
+ /// <param name="w"> The writer symbol </param>
+ /// <param name="r"> The reader symbol </param>
+ protected static Symbol Resolve(Symbol w, Symbol r)
+ {
+ return new ResolvingAction(w, r);
+ }
+
+ /// <summary>
+ /// Fixup symbol.
+ /// </summary>
+ protected class Fixup
+ {
+ private readonly Symbol[] symbols;
+
+ /// <summary>
+ /// The symbols.
+ /// </summary>
+ public Symbol[] Symbols
+ {
+ get { return (Symbol[])symbols.Clone(); }
+ }
+ /// <summary>
+ /// The position.
+ /// </summary>
+ public readonly int Pos;
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="Fixup"/> class.
+ /// </summary>
+ public Fixup(Symbol[] symbols, int pos)
+ {
+ this.symbols = (Symbol[])symbols.Clone();
+ this.Pos = pos;
+ }
+ }
+
+ /// <summary>
+ /// Flatten the given sub-array of symbols into a sub-array of symbols.
+ /// </summary>
+ protected virtual Symbol Flatten(IDictionary<Sequence, Sequence> map,
IDictionary<Sequence, IList<Fixup>> map2)
+ {
+ return this;
+ }
+
+ /// <summary>
+ /// Returns the flattened size.
+ /// </summary>
+ public virtual int FlattenedSize()
+ {
+ return 1;
+ }
+
+ /// <summary>
+ /// Flattens the given sub-array of symbols into an sub-array of
symbols. Every
+ /// <tt>Sequence</tt> in the input are replaced by its production
recursively.
+ /// Non-<tt>Sequence</tt> symbols, they internally have other symbols
those
+ /// internal symbols also get flattened. When flattening is done, the
only place
+ /// there might be Sequence symbols is in the productions of a
Repeater,
+ /// Alternative, or the symToParse and symToSkip in a
UnionAdjustAction or
+ /// SkipAction.
+ ///
+ /// Why is this done? We want our parsers to be fast. If we left the
grammars
+ /// unflattened, then the parser would be constantly copying the
contents of
+ /// nested Sequence productions onto the parsing stack. Instead,
because of
+ /// flattening, we have a long top-level production with no Sequences
unless the
+ /// Sequence is absolutely needed, e.g., in the case of a Repeater or
an
+ /// Alternative.
+ ///
+ /// Well, this is not exactly true when recursion is involved. Where
there is a
+ /// recursive record, that record will be "inlined" once, but any
internal (ie,
+ /// recursive) references to that record will be a Sequence for the
record. That
+ /// Sequence will not further inline itself
Issue Time Tracking
-------------------
Worklog Id: (was: 803103)
Time Spent: 4h 40m (was: 4.5h)
> JsonEncode Decode support for C#
> --------------------------------
>
> Key: AVRO-3001
> URL: https://issues.apache.org/jira/browse/AVRO-3001
> Project: Apache Avro
> Issue Type: Improvement
> Components: csharp
> Affects Versions: 1.10.0, 1.11.0
> Reporter: Krishnan Unni
> Assignee: Robert Yokota
> Priority: Major
> Labels: pull-request-available
> Time Spent: 4h 40m
> Remaining Estimate: 0h
>
> The C# library for avro currently supports only the Binary encoding and also
> with compile time types (Generic support only). As part of a project I am
> doing I need to validate the avro schema against the incoming json data on
> the fly without a predefined type (generated class). So basically comparing
> an avro schema (string/json representation) against a raw json string. It is
> possible with the Java library since it supports both non generic types and
> streams as well as json encoding. With C# currently this is not possible. Is
> there a plan to extend the C# library to provide these features? If yes, is
> there a timeline? If not is there any alternative to achieve this?
--
This message was sent by Atlassian Jira
(v8.20.10#820010)