[
https://issues.apache.org/jira/browse/AVRO-3001?focusedWorklogId=802317&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-802317
]
ASF GitHub Bot logged work on AVRO-3001:
----------------------------------------
Author: ASF GitHub Bot
Created on: 22/Aug/22 00:39
Start Date: 22/Aug/22 00:39
Worklog Time Spent: 10m
Work Description: github-code-scanning[bot] commented on code in PR #1833:
URL: https://github.com/apache/avro/pull/1833#discussion_r950928308
##########
lang/csharp/src/apache/test/IO/JsonCodecTests.cs:
##########
@@ -0,0 +1,226 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using NUnit.Framework;
+using System.IO;
+using System.Text;
+using Avro.Generic;
+using Avro.IO;
+using Newtonsoft.Json.Linq;
+
+namespace Avro.Test
+{
+ using Decoder = Avro.IO.Decoder;
+ using Encoder = Avro.IO.Encoder;
+
+ /// <summary>
+ /// Tests the JsonEncoder and JsonDecoder.
+ /// </summary>
+ [TestFixture]
+ public class JsonCodecTests
+ {
+ [TestCase]
+ public void TestJsonEncoderWhenIncludeNamespaceOptionIsFalse()
+ {
+ string value = "{\"b\": {\"string\":\"myVal\"}, \"a\": 1}";
+ string schemaStr = "{\"type\": \"record\", \"name\": \"ab\",
\"fields\": [" +
+ "{\"name\": \"a\", \"type\": \"int\"},
{\"name\": \"b\", \"type\": [\"null\", \"string\"]}" +
+ "]}";
+ Schema schema = Schema.Parse(schemaStr);
+ byte[] avroBytes = fromJsonToAvro(value, schema);
+
+
Assert.IsTrue(JToken.DeepEquals(JObject.Parse("{\"b\":\"myVal\",\"a\":1}"),
+ JObject.Parse(fromAvroToJson(avroBytes, schema, false))));
+ }
+
+ [TestCase]
+ public void TestJsonEncoderWhenIncludeNamespaceOptionIsTrue()
+ {
+ string value = "{\"b\": {\"string\":\"myVal\"}, \"a\": 1}";
+ string schemaStr = "{\"type\": \"record\", \"name\": \"ab\",
\"fields\": [" +
+ "{\"name\": \"a\", \"type\": \"int\"},
{\"name\": \"b\", \"type\": [\"null\", \"string\"]}" +
+ "]}";
+ Schema schema = Schema.Parse(schemaStr);
+ byte[] avroBytes = fromJsonToAvro(value, schema);
+
+
Assert.IsTrue(JToken.DeepEquals(JObject.Parse("{\"b\":{\"string\":\"myVal\"},\"a\":1}"),
+ JObject.Parse(fromAvroToJson(avroBytes, schema, true))));
+ }
+
+ [TestCase]
+ public void TestJsonRecordOrdering()
+ {
+ string value = "{\"b\": 2, \"a\": 1}";
+ Schema schema = Schema.Parse("{\"type\": \"record\", \"name\":
\"ab\", \"fields\": [" +
+ "{\"name\": \"a\", \"type\":
\"int\"}, {\"name\": \"b\", \"type\": \"int\"}" +
+ "]}");
+ GenericDatumReader<object> reader = new
GenericDatumReader<object>(schema, schema);
+ Decoder decoder = new JsonDecoder(schema, value);
+ object o = reader.Read(null, decoder);
+
+ Assert.AreEqual("{\"a\":1,\"b\":2}", fromDatumToJson(o, schema,
false));
+ }
+
+ [TestCase]
+ public void TestJsonRecordOrdering2()
+ {
+ string value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\":
\"h\"}, \"a\": {\"a2\":true, \"a1\": null}}";
+ Schema schema = Schema.Parse("{\"type\": \"record\", \"name\":
\"ab\", \"fields\": [\n" +
+ "{\"name\": \"a\", \"type\":
{\"type\":\"record\",\"name\":\"A\",\"fields\":\n" +
+ "[{\"name\":\"a1\",
\"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" +
+ "{\"name\": \"b\", \"type\":
{\"type\":\"record\",\"name\":\"B\",\"fields\":\n" +
+ "[{\"name\":\"b1\",
\"type\":\"string\"}, {\"name\":\"b2\", \"type\":\"float\"}, {\"name\":\"b3\",
\"type\":\"double\"}]}}\n" +
+ "]}");
+ GenericDatumReader<object> reader = new
GenericDatumReader<object>(schema, schema);
+ Decoder decoder = new JsonDecoder(schema, value);
+ object o = reader.Read(null, decoder);
+
+
Assert.AreEqual("{\"a\":{\"a1\":null,\"a2\":true},\"b\":{\"b1\":\"h\",\"b2\":3.14,\"b3\":1.4}}",
+ fromDatumToJson(o, schema, false));
+ }
+
+ [TestCase]
+ public void TestJsonRecordOrderingWithProjection()
+ {
+ String value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\":
\"h\"}, \"a\": {\"a2\":true, \"a1\": null}}";
+ Schema writerSchema = Schema.Parse("{\"type\": \"record\",
\"name\": \"ab\", \"fields\": [\n"
+ + "{\"name\": \"a\", \"type\":
{\"type\":\"record\",\"name\":\"A\",\"fields\":\n"
+ + "[{\"name\":\"a1\",
\"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n"
+ + "{\"name\": \"b\", \"type\":
{\"type\":\"record\",\"name\":\"B\",\"fields\":\n"
+ + "[{\"name\":\"b1\",
\"type\":\"string\"}, {\"name\":\"b2\", \"type\":\"float\"}, {\"name\":\"b3\",
\"type\":\"double\"}]}}\n"
+ + "]}");
+ Schema readerSchema = Schema.Parse("{\"type\": \"record\",
\"name\": \"ab\", \"fields\": [\n"
+ + "{\"name\": \"a\", \"type\":
{\"type\":\"record\",\"name\":\"A\",\"fields\":\n"
+ + "[{\"name\":\"a1\",
\"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}}\n" +
+ "]}");
+ GenericDatumReader<object> reader = new
GenericDatumReader<object>(writerSchema, readerSchema);
+ Decoder decoder = new JsonDecoder(writerSchema, value);
+ Object o = reader.Read(null, decoder);
+
+ Assert.AreEqual("{\"a\":{\"a1\":null,\"a2\":true}}",
+ fromDatumToJson(o, readerSchema, false));
+ }
+
+
+ [TestCase]
+ public void testJsonRecordOrderingWithProjection2()
+ {
+ String value =
+ "{\"b\": { \"b1\": \"h\", \"b2\": [3.14, 3.56], \"b3\": 1.4},
\"a\": {\"a2\":true, \"a1\": null}}";
+ Schema writerSchema = Schema.Parse("{\"type\": \"record\",
\"name\": \"ab\", \"fields\": [\n"
+ + "{\"name\": \"a\", \"type\":
{\"type\":\"record\",\"name\":\"A\",\"fields\":\n"
+ + "[{\"name\":\"a1\",
\"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n"
+ + "{\"name\": \"b\", \"type\":
{\"type\":\"record\",\"name\":\"B\",\"fields\":\n"
+ + "[{\"name\":\"b1\",
\"type\":\"string\"}, {\"name\":\"b2\", \"type\":{\"type\":\"array\",
\"items\":\"float\"}}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n"
+ + "]}");
+
+ Schema readerSchema = Schema.Parse("{\"type\": \"record\",
\"name\": \"ab\", \"fields\": [\n"
+ + "{\"name\": \"a\", \"type\":
{\"type\":\"record\",\"name\":\"A\",\"fields\":\n"
+ + "[{\"name\":\"a1\",
\"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}}\n" +
+ "]}");
+
+ GenericDatumReader<object> reader = new
GenericDatumReader<object>(writerSchema, readerSchema);
+ Decoder decoder = new JsonDecoder(writerSchema, value);
+ object o = reader.Read(null, decoder);
+
+ Assert.AreEqual("{\"a\":{\"a1\":null,\"a2\":true}}",
+ fromDatumToJson(o, readerSchema, false));
+ }
+
+ [TestCase("int", 1)]
+ [TestCase("long", 1L)]
+ [TestCase("float", 1.0F)]
+ [TestCase("double", 1.0)]
+ public void TestJsonDecoderNumeric(string type, object value)
+ {
+ string def = "{\"type\":\"record\",\"name\":\"X\",\"fields\":" +
"[{\"type\":\"" + type +
+ "\",\"name\":\"n\"}]}";
+ Schema schema = Schema.Parse(def);
+ DatumReader<GenericRecord> reader = new
GenericDatumReader<GenericRecord>(schema, schema);
+
+ string[] records = { "{\"n\":1}", "{\"n\":1.0}" };
+
+ foreach (string record in records)
+ {
+ Decoder decoder = new JsonDecoder(schema, record);
+ GenericRecord r = reader.Read(null, decoder);
+ Assert.AreEqual(value, r["n"]);
+ }
Review Comment:
## Missed opportunity to use Select
This foreach loop immediately maps its iteration variable to another
variable [here](1) - consider mapping the sequence explicitly using
'.Select(...)'.
[Show more
details](https://github.com/apache/avro/security/code-scanning/2897)
##########
lang/csharp/src/apache/main/IO/Parsing/Symbol.cs:
##########
@@ -0,0 +1,778 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Avro.IO.Parsing
+{
+ /// <summary>
+ /// Symbol is the base of all symbols (terminals and non-terminals) of the
+ /// grammar.
+ /// </summary>
+ public abstract class Symbol
+ {
+ /// <summary>
+ /// The type of symbol.
+ /// </summary>
+ public enum Kind
+ {
+ /// <summary>
+ /// terminal symbols which have no productions </summary>
+ Terminal,
+
+ /// <summary>
+ /// Start symbol for some grammar </summary>
+ Root,
+
+ /// <summary>
+ /// non-terminal symbol which is a sequence of one or more other
symbols </summary>
+ Sequence,
+
+ /// <summary>
+ /// non-terminal to represent the contents of an array or map
</summary>
+ Repeater,
+
+ /// <summary>
+ /// non-terminal to represent the union </summary>
+ Alternative,
+
+ /// <summary>
+ /// non-terminal action symbol which are automatically consumed
</summary>
+ ImplicitAction,
+
+ /// <summary>
+ /// non-terminal action symbol which is explicitly consumed
</summary>
+ ExplicitAction
+ }
+
+ /// The kind of this symbol.
+ public readonly Kind SymKind;
+
+ /// <summary>
+ /// The production for this symbol. If this symbol is a terminal this
is
+ /// <tt>null</tt>. Otherwise this holds the the sequence of the
symbols that
+ /// forms the production for this symbol. The sequence is in the
reverse order of
+ /// production. This is useful for easy copying onto parsing stack.
+ ///
+ /// Please note that this is a final. So the production for a symbol
should be
+ /// known before that symbol is constructed. This requirement cannot
be met for
+ /// those symbols which are recursive (e.g. a record that holds union
a branch of
+ /// which is the record itself). To resolve this problem, we
initialize the
+ /// symbol with an array of nulls. Later we fill the symbols. Not
clean, but
+ /// works. The other option is to not have this field a final. But
keeping it
+ /// final and thus keeping symbol immutable gives some comfort. See
various
+ /// generators how we generate records.
+ /// </summary>
+ public readonly Symbol[] Production;
+
+ /// <summary>
+ /// Constructs a new symbol of the given kind.
+ /// </summary>
+ protected Symbol(Kind kind) : this(kind, null)
+ {
+ }
+
+ /// <summary>
+ /// Constructs a new symbol of the given kind and production.
+ /// </summary>
+ protected Symbol(Kind kind, Symbol[] production)
+ {
+ this.Production = production;
+ this.SymKind = kind;
+ }
+
+ /// <summary>
+ /// A convenience method to construct a root symbol.
+ /// </summary>
+ public static Symbol NewRoot(params Symbol[] symbols)
+ {
+ return new Root(symbols);
+ }
+
+ /// <summary>
+ /// A convenience method to construct a sequence.
+ /// </summary>
+ /// <param name="production"> The constituent symbols of the sequence.
</param>
+ public static Symbol NewSeq(params Symbol[] production)
+ {
+ return new Sequence(production);
+ }
+
+ /// <summary>
+ /// A convenience method to construct a repeater.
+ /// </summary>
+ /// <param name="endSymbol"> The end symbol. </param>
+ /// <param name="symsToRepeat"> The symbols to repeat in the repeater.
</param>
+ public static Symbol NewRepeat(Symbol endSymbol, params Symbol[]
symsToRepeat)
+ {
+ return new Repeater(endSymbol, symsToRepeat);
+ }
+
+ /// <summary>
+ /// A convenience method to construct a union.
+ /// </summary>
+ public static Symbol NewAlt(Symbol[] symbols, string[] labels)
+ {
+ return new Alternative(symbols, labels);
+ }
+
+ /// <summary>
+ /// A convenience method to construct an ErrorAction.
+ /// </summary>
+ /// <param name="e"> </param>
+ protected static Symbol Error(string e)
+ {
+ return new ErrorAction(e);
+ }
+
+ /// <summary>
+ /// A convenience method to construct a ResolvingAction.
+ /// </summary>
+ /// <param name="w"> The writer symbol </param>
+ /// <param name="r"> The reader symbol </param>
+ protected static Symbol Resolve(Symbol w, Symbol r)
+ {
+ return new ResolvingAction(w, r);
+ }
+
+ protected class Fixup
+ {
+ public readonly Symbol[] Symbols;
+ public readonly int Pos;
+
+ public Fixup(Symbol[] symbols, int pos)
+ {
+ this.Symbols = symbols;
+ this.Pos = pos;
+ }
+ }
+
+ protected virtual Symbol Flatten(IDictionary<Sequence, Sequence> map,
IDictionary<Sequence, IList<Fixup>> map2)
+ {
+ return this;
+ }
+
+ public virtual int FlattenedSize()
+ {
+ return 1;
+ }
+
+ /// <summary>
+ /// Flattens the given sub-array of symbols into an sub-array of
symbols. Every
+ /// <tt>Sequence</tt> in the input are replaced by its production
recursively.
+ /// Non-<tt>Sequence</tt> symbols, they internally have other symbols
those
+ /// internal symbols also get flattened. When flattening is done, the
only place
+ /// there might be Sequence symbols is in the productions of a
Repeater,
+ /// Alternative, or the symToParse and symToSkip in a
UnionAdjustAction or
+ /// SkipAction.
+ ///
+ /// Why is this done? We want our parsers to be fast. If we left the
grammars
+ /// unflattened, then the parser would be constantly copying the
contents of
+ /// nested Sequence productions onto the parsing stack. Instead,
because of
+ /// flattening, we have a long top-level production with no Sequences
unless the
+ /// Sequence is absolutely needed, e.g., in the case of a Repeater or
an
+ /// Alternative.
+ ///
+ /// Well, this is not exactly true when recursion is involved. Where
there is a
+ /// recursive record, that record will be "inlined" once, but any
internal (ie,
+ /// recursive) references to that record will be a Sequence for the
record. That
+ /// Sequence will not further inline itself
Issue Time Tracking
-------------------
Worklog Id: (was: 802317)
Time Spent: 20m (was: 10m)
> JsconEncode Decode support for C#
> ---------------------------------
>
> Key: AVRO-3001
> URL: https://issues.apache.org/jira/browse/AVRO-3001
> Project: Apache Avro
> Issue Type: Improvement
> Components: csharp
> Affects Versions: 1.10.0, 1.11.0
> Reporter: Krishnan Unni
> Priority: Major
> Labels: pull-request-available
> Time Spent: 20m
> Remaining Estimate: 0h
>
> The C# library for avro currently supports only the Binary encoding and also
> with compile time types (Generic support only). As part of a project I am
> doing I need to validate the avro schema against the incoming json data on
> the fly without a predefined type (generated class). So basically comparing
> an avro schema (string/json representation) against a raw json string. It is
> possible with the Java library since it supports both non generic types and
> streams as well as json encoding. With C# currently this is not possible. Is
> there a plan to extend the C# library to provide these features? If yes, is
> there a timeline? If not is there any alternative to achieve this?
--
This message was sent by Atlassian Jira
(v8.20.10#820010)