This is an automated email from the ASF dual-hosted git repository.

eerhardt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 3e71ea0  ARROW-10719: [C#] ArrowStreamWriter doesn't write schema 
metadata
3e71ea0 is described below

commit 3e71ea0e3f20ae3b883882d61f394fdca9577eb1
Author: Eric Erhardt <[email protected]>
AuthorDate: Tue Dec 15 21:42:42 2020 -0600

    ARROW-10719: [C#] ArrowStreamWriter doesn't write schema metadata
    
    Add support for reading and writing Schema and Field metadata
    
    Also, reducing some allocations in a few spots.
    
    Tagging people who have contributed to the C# library in the past year for 
review. (Feel free to ignore if you don't have time.)
    
    @pgovind @suhsteve @chutchinson @Ulimo @mr-smidge @zgramana @HashidaTKS 
@nhustler
    
    cc @olgacarpenter
    
    Closes #8902 from eerhardt/Metadata
    
    Authored-by: Eric Erhardt <[email protected]>
    Signed-off-by: Eric Erhardt <[email protected]>
---
 csharp/src/Apache.Arrow/Field.Builder.cs           |  5 +-
 csharp/src/Apache.Arrow/Field.cs                   | 18 ++++++-
 csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs   | 60 ++++++++++++++++++----
 csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs   | 39 +++++++++-----
 csharp/src/Apache.Arrow/Schema.Builder.cs          |  8 +--
 csharp/src/Apache.Arrow/Schema.cs                  | 15 ++++++
 .../Apache.Arrow.Tests/ArrowStreamWriterTests.cs   | 36 ++++++++++++-
 csharp/test/Apache.Arrow.Tests/FieldComparer.cs    |  2 +-
 csharp/test/Apache.Arrow.Tests/SchemaComparer.cs   |  2 +-
 csharp/test/Apache.Arrow.Tests/TestData.cs         | 25 ++++-----
 10 files changed, 159 insertions(+), 51 deletions(-)

diff --git a/csharp/src/Apache.Arrow/Field.Builder.cs 
b/csharp/src/Apache.Arrow/Field.Builder.cs
index 6d820b6..1e7aa19 100644
--- a/csharp/src/Apache.Arrow/Field.Builder.cs
+++ b/csharp/src/Apache.Arrow/Field.Builder.cs
@@ -23,14 +23,13 @@ namespace Apache.Arrow
     {
         public class Builder
         {
-            private readonly Dictionary<string, string> _metadata;
+            private Dictionary<string, string> _metadata;
             private string _name;
             private IArrowType _type;
             private bool _nullable;
 
             public Builder()
             {
-                _metadata = new Dictionary<string, string>();
                 _name = string.Empty;
                 _type = NullType.Default;
                 _nullable = true;
@@ -66,6 +65,8 @@ namespace Apache.Arrow
                     throw new ArgumentNullException(nameof(key));
                 }
 
+                _metadata ??= new Dictionary<string, string>();
+
                 _metadata[key] = value;
                 return this;
             }
diff --git a/csharp/src/Apache.Arrow/Field.cs b/csharp/src/Apache.Arrow/Field.cs
index 33aefb7..6e507b6 100644
--- a/csharp/src/Apache.Arrow/Field.cs
+++ b/csharp/src/Apache.Arrow/Field.cs
@@ -15,6 +15,7 @@
 
 using System;
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.Linq;
 using Apache.Arrow.Types;
 
@@ -34,6 +35,22 @@ namespace Apache.Arrow
 
         public Field(string name, IArrowType dataType, bool nullable,
             IEnumerable<KeyValuePair<string, string>> metadata = default)
+            : this(name, dataType, nullable)
+        {
+            Metadata = metadata?.ToDictionary(kv => kv.Key, kv => kv.Value);
+
+        }
+
+        internal Field(string name, IArrowType dataType, bool nullable,
+            IReadOnlyDictionary<string, string> metadata, bool copyCollections)
+            : this(name, dataType, nullable)
+        {
+            Debug.Assert(copyCollections == false, "This internal constructor 
is to not copy the collections.");
+
+            Metadata = metadata;
+        }
+
+        private Field(string name, IArrowType dataType, bool nullable)
         {
             if (string.IsNullOrWhiteSpace(name))
             {
@@ -43,7 +60,6 @@ namespace Apache.Arrow
             Name = name;
             DataType = dataType ?? NullType.Default;
             IsNullable = nullable;
-            Metadata = metadata?.ToDictionary(kv => kv.Key, kv => kv.Value);
         }
     }
 }
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs 
b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index b42724c..3ad6ed2 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -17,6 +17,7 @@ using System;
 using System.Buffers;
 using System.Buffers.Binary;
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.IO;
 using System.Threading;
 using System.Threading.Tasks;
@@ -442,23 +443,28 @@ namespace Apache.Arrow.Ipc
 
         private protected Offset<Flatbuf.Schema> SerializeSchema(Schema schema)
         {
-            // TODO: Serialize schema metadata
+            // Build metadata
+            VectorOffset metadataVectorOffset = default;
+            if (schema.HasMetadata)
+            {
+                Offset<Flatbuf.KeyValue>[] metadataOffsets = 
GetMetadataOffsets(schema.Metadata);
+                metadataVectorOffset = 
Flatbuf.Schema.CreateCustomMetadataVector(Builder, metadataOffsets);
+            }
 
             // Build fields
-
             var fieldOffsets = new Offset<Flatbuf.Field>[schema.Fields.Count];
-
             for (int i = 0; i < fieldOffsets.Length; i++)
             {
                 Field field = schema.GetFieldByIndex(i);
                 StringOffset fieldNameOffset = 
Builder.CreateString(field.Name);
                 ArrowTypeFlatbufferBuilder.FieldType fieldType = 
_fieldTypeBuilder.BuildFieldType(field);
 
-                VectorOffset fieldChildrenVectorOffset = 
Builder.CreateVectorOfTables(GetChildrenFieldOffsets(field));
+                VectorOffset fieldChildrenVectorOffset = 
GetChildrenFieldOffset(field);
+                VectorOffset fieldMetadataVectorOffset = 
GetFieldMetadataOffset(field);
 
                 fieldOffsets[i] = Flatbuf.Field.CreateField(Builder,
                     fieldNameOffset, field.IsNullable, fieldType.Type, 
fieldType.Offset,
-                    default, fieldChildrenVectorOffset, default);
+                    default, fieldChildrenVectorOffset, 
fieldMetadataVectorOffset);
             }
 
             VectorOffset fieldsVectorOffset = 
Flatbuf.Schema.CreateFieldsVector(Builder, fieldOffsets);
@@ -468,14 +474,14 @@ namespace Apache.Arrow.Ipc
             Flatbuf.Endianness endianness = BitConverter.IsLittleEndian ? 
Flatbuf.Endianness.Little : Flatbuf.Endianness.Big;
 
             return Flatbuf.Schema.CreateSchema(
-                Builder, endianness, fieldsVectorOffset);
+                Builder, endianness, fieldsVectorOffset, metadataVectorOffset);
         }
 
-        private protected Offset<Flatbuf.Field>[] 
GetChildrenFieldOffsets(Field field)
+        private VectorOffset GetChildrenFieldOffset(Field field)
         {
             if (!(field.DataType is NestedType type))
             {
-                return System.Array.Empty<Offset<Flatbuf.Field>>();
+                return default;
             }
 
             int childrenCount = type.Fields.Count;
@@ -486,13 +492,45 @@ namespace Apache.Arrow.Ipc
                 Field childField = type.Fields[i];
                 StringOffset childFieldNameOffset = 
Builder.CreateString(childField.Name);
                 ArrowTypeFlatbufferBuilder.FieldType childFieldType = 
_fieldTypeBuilder.BuildFieldType(childField);
-                VectorOffset childFieldChildrenVectorOffset = 
Builder.CreateVectorOfTables(GetChildrenFieldOffsets(childField));
+
+                VectorOffset childFieldChildrenVectorOffset = 
GetChildrenFieldOffset(childField);
+                VectorOffset childFieldMetadataVectorOffset = 
GetFieldMetadataOffset(childField);
 
                 children[i] = Flatbuf.Field.CreateField(Builder,
                     childFieldNameOffset, childField.IsNullable, 
childFieldType.Type, childFieldType.Offset,
-                    default, childFieldChildrenVectorOffset, default);
+                    default, childFieldChildrenVectorOffset, 
childFieldMetadataVectorOffset);
+            }
+
+            return Builder.CreateVectorOfTables(children);
+        }
+
+        private VectorOffset GetFieldMetadataOffset(Field field)
+        {
+            if (!field.HasMetadata)
+            {
+                return default;
+            }
+
+            Offset<Flatbuf.KeyValue>[] metadataOffsets = 
GetMetadataOffsets(field.Metadata);
+            return Flatbuf.Field.CreateCustomMetadataVector(Builder, 
metadataOffsets);
+        }
+
+        private Offset<Flatbuf.KeyValue>[] 
GetMetadataOffsets(IReadOnlyDictionary<string, string> metadata)
+        {
+            Debug.Assert(metadata != null);
+            Debug.Assert(metadata.Count > 0);
+
+            Offset<Flatbuf.KeyValue>[] metadataOffsets = new 
Offset<Flatbuf.KeyValue>[metadata.Count];
+            int index = 0;
+            foreach (KeyValuePair<string, string> metadatum in metadata)
+            {
+                StringOffset keyOffset = Builder.CreateString(metadatum.Key);
+                StringOffset valueOffset = 
Builder.CreateString(metadatum.Value);
+
+                metadataOffsets[index++] = 
Flatbuf.KeyValue.CreateKeyValue(Builder, keyOffset, valueOffset);
             }
-            return children;
+
+            return metadataOffsets;
         }
 
         private Offset<Flatbuf.Schema> WriteSchema(Schema schema)
diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs 
b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
index e86336a..b39e749 100644
--- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
+++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
@@ -14,6 +14,7 @@
 // limitations under the License.
 
 using System;
+using System.Collections.Generic;
 using System.Diagnostics;
 using System.IO;
 
@@ -53,31 +54,43 @@ namespace Apache.Arrow.Ipc
 
         internal static Schema GetSchema(Flatbuf.Schema schema)
         {
-            var schemaBuilder = new Schema.Builder();
-
+            List<Field> fields = new List<Field>();
             for (int i = 0; i < schema.FieldsLength; i++)
             {
                 Flatbuf.Field field = schema.Fields(i).GetValueOrDefault();
 
-                schemaBuilder.Field(FieldFromFlatbuffer(field));
+                fields.Add(FieldFromFlatbuffer(field));
+            }
+
+            Dictionary<string, string> metadata = schema.CustomMetadataLength 
> 0 ? new Dictionary<string, string>() : null;
+            for (int i = 0; i < schema.CustomMetadataLength; i++)
+            {
+                Flatbuf.KeyValue keyValue = 
schema.CustomMetadata(i).GetValueOrDefault();
+
+                metadata[keyValue.Key] = keyValue.Value;
             }
 
-            return schemaBuilder.Build();
+            return new Schema(fields, metadata, copyCollections: false);
         }
 
         private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField)
         {
-            Field[] childFields = null;
-            if (flatbufField.ChildrenLength > 0)
+            Field[] childFields = flatbufField.ChildrenLength > 0 ? new 
Field[flatbufField.ChildrenLength] : null;
+            for (int i = 0; i < flatbufField.ChildrenLength; i++)
             {
-                childFields = new Field[flatbufField.ChildrenLength];
-                for (int i = 0; i < flatbufField.ChildrenLength; i++)
-                {
-                    Flatbuf.Field? childFlatbufField = 
flatbufField.Children(i);
-                    childFields[i] = 
FieldFromFlatbuffer(childFlatbufField.Value);
-                }
+                Flatbuf.Field? childFlatbufField = flatbufField.Children(i);
+                childFields[i] = FieldFromFlatbuffer(childFlatbufField.Value);
             }
-            return new Field(flatbufField.Name, 
GetFieldArrowType(flatbufField, childFields), flatbufField.Nullable);
+
+            Dictionary<string, string> metadata = 
flatbufField.CustomMetadataLength > 0 ? new Dictionary<string, string>() : null;
+            for (int i = 0; i < flatbufField.CustomMetadataLength; i++)
+            {
+                Flatbuf.KeyValue keyValue = 
flatbufField.CustomMetadata(i).GetValueOrDefault();
+
+                metadata[keyValue.Key] = keyValue.Value;
+            }
+
+            return new Field(flatbufField.Name, 
GetFieldArrowType(flatbufField, childFields), flatbufField.Nullable, metadata, 
copyCollections: false);
         }
 
         private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, 
Field[] childFields = null)
diff --git a/csharp/src/Apache.Arrow/Schema.Builder.cs 
b/csharp/src/Apache.Arrow/Schema.Builder.cs
index 277728b..89e9e3c 100644
--- a/csharp/src/Apache.Arrow/Schema.Builder.cs
+++ b/csharp/src/Apache.Arrow/Schema.Builder.cs
@@ -20,22 +20,20 @@ namespace Apache.Arrow
 {
     public partial class Schema
     {
-
         public class Builder
         {
             private readonly List<Field> _fields;
-            private readonly Dictionary<string, string> _metadata;
+            private Dictionary<string, string> _metadata;
 
             public Builder()
             {
                 _fields = new List<Field>();
-                _metadata = new Dictionary<string, string>();
             }
 
             public Builder Clear()
             {
                 _fields.Clear();
-                _metadata.Clear();
+                _metadata?.Clear();
                 return this;
             }
 
@@ -66,6 +64,8 @@ namespace Apache.Arrow
                     throw new ArgumentNullException(nameof(key));
                 }
 
+                _metadata ??= new Dictionary<string, string>();
+
                 _metadata[key] = value;
                 return this;
             }
diff --git a/csharp/src/Apache.Arrow/Schema.cs 
b/csharp/src/Apache.Arrow/Schema.cs
index f330fbf..59d1921 100644
--- a/csharp/src/Apache.Arrow/Schema.cs
+++ b/csharp/src/Apache.Arrow/Schema.cs
@@ -15,6 +15,7 @@
 
 using System;
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.Linq;
 
 namespace Apache.Arrow
@@ -53,6 +54,20 @@ namespace Apache.Arrow
             Metadata = metadata?.ToDictionary(kv => kv.Key, kv => kv.Value);
         }
 
+        internal Schema(List<Field> fields, IReadOnlyDictionary<string, 
string> metadata, bool copyCollections)
+        {
+            Debug.Assert(fields != null);
+            Debug.Assert(copyCollections == false, "This internal constructor 
is to not copy the collections.");
+
+            _fields = fields;
+
+            _fieldsDictionary = fields.ToDictionary(
+                field => field.Name, field => field,
+                StringComparer.OrdinalIgnoreCase);
+
+            Metadata = metadata;
+        }
+
         public Field GetFieldByIndex(int i)
         {
             return _fields[i];
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs 
b/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs
index b65d735..44546f1 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs
@@ -13,15 +13,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using Apache.Arrow.Ipc;
-using Apache.Arrow.Types;
 using System;
 using System.Buffers.Binary;
+using System.Collections.Generic;
 using System.IO;
 using System.Linq;
 using System.Net;
 using System.Net.Sockets;
 using System.Threading.Tasks;
+using Apache.Arrow.Ipc;
+using Apache.Arrow.Types;
 using Xunit;
 
 namespace Apache.Arrow.Tests
@@ -462,5 +463,36 @@ namespace Apache.Arrow.Tests
                 Assert.Equal(0, endOfBuffer2);
             }
         }
+
+        [Fact]
+        public void WritesMetadataCorrectly()
+        {
+            Schema.Builder schemaBuilder = new Schema.Builder()
+                .Metadata("index", "1, 2, 3, 4, 5")
+                .Metadata("reverseIndex", "5, 4, 3, 2, 1")
+                .Field(f => f
+                    .Name("IntCol")
+                    .DataType(UInt32Type.Default)
+                    .Metadata("custom1", "false")
+                    .Metadata("custom2", "true"))
+                .Field(f => f
+                    .Name("StringCol")
+                    .DataType(StringType.Default)
+                    .Metadata("custom2", "false")
+                    .Metadata("custom3", "4"))
+                .Field(f => f
+                    .Name("StructCol")
+                    .DataType(new StructType(new[] {
+                        new Field("Inner1", FloatType.Default, nullable: 
false),
+                        new Field("Inner2", DoubleType.Default, nullable: 
true, new Dictionary<string, string>() { { "customInner", "1" }, { 
"customInner2", "3" } })
+                    }))
+                    .Metadata("custom4", "6.4")
+                    .Metadata("custom1", "true"));
+
+            var schema = schemaBuilder.Build();
+            RecordBatch originalBatch = 
TestData.CreateSampleRecordBatch(schema, length: 10);
+
+            TestRoundTripRecordBatch(originalBatch);
+        }
     }
 }
diff --git a/csharp/test/Apache.Arrow.Tests/FieldComparer.cs 
b/csharp/test/Apache.Arrow.Tests/FieldComparer.cs
index fac5e05..d7dcc39 100644
--- a/csharp/test/Apache.Arrow.Tests/FieldComparer.cs
+++ b/csharp/test/Apache.Arrow.Tests/FieldComparer.cs
@@ -18,7 +18,7 @@ using Xunit;
 
 namespace Apache.Arrow.Tests
 {
-    public class FieldComparer
+    public static class FieldComparer
     {
         public static void Compare(Field expected, Field actual)
         {
diff --git a/csharp/test/Apache.Arrow.Tests/SchemaComparer.cs 
b/csharp/test/Apache.Arrow.Tests/SchemaComparer.cs
index d1fc636..3546d5e 100644
--- a/csharp/test/Apache.Arrow.Tests/SchemaComparer.cs
+++ b/csharp/test/Apache.Arrow.Tests/SchemaComparer.cs
@@ -18,7 +18,7 @@ using Xunit;
 
 namespace Apache.Arrow.Tests
 {
-    public class SchemaComparer
+    public static class SchemaComparer
     {
         public static void Compare(Schema expected, Schema actual)
         {
diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs 
b/csharp/test/Apache.Arrow.Tests/TestData.cs
index 0066f49..5688c62 100644
--- a/csharp/test/Apache.Arrow.Tests/TestData.cs
+++ b/csharp/test/Apache.Arrow.Tests/TestData.cs
@@ -58,6 +58,11 @@ namespace Apache.Arrow.Tests
 
             Schema schema = builder.Build();
 
+            return CreateSampleRecordBatch(schema, length);
+        }
+
+        public static RecordBatch CreateSampleRecordBatch(Schema schema, int 
length)
+        {
             IEnumerable<IArrowArray> arrays = CreateArrays(schema, length);
 
             return new RecordBatch(schema, arrays, length);
@@ -200,27 +205,15 @@ namespace Apache.Arrow.Tests
                 valueBuilder.Append(0);
 
                 Array = builder.Build();
-
             }
 
             public void Visit(StructType type)
             {
-                StringArray.Builder stringBuilder = new StringArray.Builder();
-                for (int i = 0; i < Length; i++)
+                IArrowArray[] childArrays = new IArrowArray[type.Fields.Count];
+                for (int i = 0; i < childArrays.Length; i++)
                 {
-                    stringBuilder.Append(i.ToString());
+                    childArrays[i] = CreateArray(type.Fields[i], Length);
                 }
-                StringArray stringArray = stringBuilder.Build();
-                Int32Array.Builder intBuilder = new Int32Array.Builder();
-                for (int i = 0; i < Length; i++)
-                {
-                    intBuilder.Append(i);
-                }
-                Int32Array intArray = intBuilder.Build();
-
-                List<Array> arrays = new List<Array>();
-                arrays.Add(stringArray);
-                arrays.Add(intArray);
 
                 ArrowBuffer.BitmapBuilder nullBitmap = new 
ArrowBuffer.BitmapBuilder();
                 for (int i = 0; i < Length; i++)
@@ -228,7 +221,7 @@ namespace Apache.Arrow.Tests
                     nullBitmap.Append(true);
                 }
                 
-                Array = new StructArray(type, Length, arrays, 
nullBitmap.Build());
+                Array = new StructArray(type, Length, childArrays, 
nullBitmap.Build());
             }
 
             private void GenerateArray<T, TArray, 
TArrayBuilder>(IArrowArrayBuilder<T, TArray, TArrayBuilder> builder, Func<int, 
T> generator)

Reply via email to