This is an automated email from the ASF dual-hosted git repository.

curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new c87faf82a4 GH-36588: [C#] Support blank column names and enable more 
integration tests. (#39167)
c87faf82a4 is described below

commit c87faf82a410a71751fdf85391c0dca20088b7d6
Author: Curt Hagenlocher <[email protected]>
AuthorDate: Sun Dec 10 18:01:35 2023 -0800

    GH-36588: [C#] Support blank column names and enable more integration 
tests. (#39167)
    
    ### What changes are included in this PR?
    
    Allows field names to be blank (but not null).
    Enables schema metadata to be read from JSON integration tests.
    Enables integration tests for cases that are now working.
    
    ### Are these changes tested?
    
    Yes.
    
    * Closes: #36588
    
    Authored-by: Curt Hagenlocher <[email protected]>
    Signed-off-by: Curt Hagenlocher <[email protected]>
---
 csharp/src/Apache.Arrow/Field.Builder.cs           |  3 +-
 csharp/src/Apache.Arrow/Field.cs                   | 11 +++--
 csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs   |  9 ++--
 csharp/src/Apache.Arrow/Types/MapType.cs           | 48 ++++++++++++++++++++--
 .../test/Apache.Arrow.IntegrationTest/JsonFile.cs  |  6 +++
 .../CDataInterfacePythonTests.cs                   | 44 ++++++++++++++++++++
 dev/archery/archery/integration/datagen.py         |  6 +--
 7 files changed, 105 insertions(+), 22 deletions(-)

diff --git a/csharp/src/Apache.Arrow/Field.Builder.cs 
b/csharp/src/Apache.Arrow/Field.Builder.cs
index 1e7aa192e0..f7f33383e2 100644
--- a/csharp/src/Apache.Arrow/Field.Builder.cs
+++ b/csharp/src/Apache.Arrow/Field.Builder.cs
@@ -30,14 +30,13 @@ namespace Apache.Arrow
 
             public Builder()
             {
-                _name = string.Empty;
                 _type = NullType.Default;
                 _nullable = true;
             }
 
             public Builder Name(string value)
             {
-                if (string.IsNullOrWhiteSpace(value))
+                if (value == null)
                 {
                     throw new ArgumentNullException(nameof(value));
                 }
diff --git a/csharp/src/Apache.Arrow/Field.cs b/csharp/src/Apache.Arrow/Field.cs
index 4fddd1bc4e..ac3cafac93 100644
--- a/csharp/src/Apache.Arrow/Field.cs
+++ b/csharp/src/Apache.Arrow/Field.cs
@@ -35,24 +35,23 @@ namespace Apache.Arrow
 
         public Field(string name, IArrowType dataType, bool nullable,
             IEnumerable<KeyValuePair<string, string>> metadata = default)
-            : this(name, dataType, nullable, false)
+            : this(name, dataType, nullable)
         {
             Metadata = metadata?.ToDictionary(kv => kv.Key, kv => kv.Value);
-
         }
 
         internal Field(string name, IArrowType dataType, bool nullable,
-            IReadOnlyDictionary<string, string> metadata, bool 
copyCollections, bool allowBlankName)
-            : this(name, dataType, nullable, allowBlankName)
+            IReadOnlyDictionary<string, string> metadata, bool copyCollections)
+            : this(name, dataType, nullable)
         {
             Debug.Assert(copyCollections == false, "This internal constructor 
is to not copy the collections.");
 
             Metadata = metadata;
         }
 
-        private Field(string name, IArrowType dataType, bool nullable, bool 
allowBlankName)
+        private Field(string name, IArrowType dataType, bool nullable)
         {
-            if (name == null || (!allowBlankName && 
string.IsNullOrWhiteSpace(name)))
+            if (name == null)
             {
                 throw new ArgumentNullException(nameof(name));
             }
diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs 
b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
index 3f504cf3b9..633554fc53 100644
--- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
+++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
@@ -59,7 +59,7 @@ namespace Apache.Arrow.Ipc
             for (int i = 0; i < schema.FieldsLength; i++)
             {
                 Flatbuf.Field field = schema.Fields(i).GetValueOrDefault();
-                fields.Add(FieldFromFlatbuffer(field, ref dictionaryMemo, 
allowBlankName: false));
+                fields.Add(FieldFromFlatbuffer(field, ref dictionaryMemo));
             }
 
             Dictionary<string, string> metadata = schema.CustomMetadataLength 
> 0 ? new Dictionary<string, string>() : null;
@@ -73,14 +73,13 @@ namespace Apache.Arrow.Ipc
             return new Schema(fields, metadata, copyCollections: false);
         }
 
-        private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField, 
ref DictionaryMemo dictionaryMemo, bool allowBlankName)
+        private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField, 
ref DictionaryMemo dictionaryMemo)
         {
-            bool allowBlankNameChild = flatbufField.ChildrenLength == 1 && 
flatbufField.TypeType == Flatbuf.Type.FixedSizeList;
             Field[] childFields = flatbufField.ChildrenLength > 0 ? new 
Field[flatbufField.ChildrenLength] : null;
             for (int i = 0; i < flatbufField.ChildrenLength; i++)
             {
                 Flatbuf.Field? childFlatbufField = flatbufField.Children(i);
-                childFields[i] = FieldFromFlatbuffer(childFlatbufField.Value, 
ref dictionaryMemo, allowBlankNameChild);
+                childFields[i] = FieldFromFlatbuffer(childFlatbufField.Value, 
ref dictionaryMemo);
             }
 
             Flatbuf.DictionaryEncoding? dictionaryEncoding = 
flatbufField.Dictionary;
@@ -104,7 +103,7 @@ namespace Apache.Arrow.Ipc
                 metadata[keyValue.Key] = keyValue.Value;
             }
 
-            var arrowField = new Field(flatbufField.Name, type, 
flatbufField.Nullable, metadata, copyCollections: false, allowBlankName);
+            var arrowField = new Field(flatbufField.Name, type, 
flatbufField.Nullable, metadata, copyCollections: false);
 
             if (dictionaryEncoding.HasValue)
             {
diff --git a/csharp/src/Apache.Arrow/Types/MapType.cs 
b/csharp/src/Apache.Arrow/Types/MapType.cs
index 73112c815b..47e0be48f9 100644
--- a/csharp/src/Apache.Arrow/Types/MapType.cs
+++ b/csharp/src/Apache.Arrow/Types/MapType.cs
@@ -13,12 +13,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+using System;
 using System.Collections.Generic;
 
 namespace Apache.Arrow.Types
 {
     public sealed class MapType : NestedType // MapType = 
ListType(StructType("key", "value")) 
     {
+        private const string EntriesKey = "entries";
+        private const string KeyKey = "key";
+        private const string ValueKey = "value";
+
         public override ArrowTypeId TypeId => ArrowTypeId.Map;
         public override string Name => "map";
         public readonly bool KeySorted;
@@ -28,20 +33,23 @@ namespace Apache.Arrow.Types
         public Field ValueField => KeyValueType.Fields[1];
 
         public MapType(IArrowType key, IArrowType value, bool nullable = true, 
bool keySorted = false)
-            : this(new Field("key", key, false), new Field("value", value, 
nullable), keySorted)
+            : base(Entries(key, value, nullable))
         {
+            KeySorted = keySorted;
         }
 
         public MapType(Field key, Field value, bool keySorted = false)
-            : this(new StructType(new List<Field>() { key, value }), keySorted)
+            : base(Entries(key, value))
         {
+            KeySorted = keySorted;
         }
 
-        public MapType(StructType entries, bool keySorted = false) : this(new 
Field("entries", entries, false), keySorted)
+        public MapType(StructType entries, bool keySorted = false) : 
base(Entries(entries))
         {
+            KeySorted = keySorted;
         }
 
-        public MapType(Field entries, bool keySorted = false) : base(entries)
+        public MapType(Field entries, bool keySorted = false) : 
base(Entries(entries))
         {
             KeySorted = keySorted;
         }
@@ -54,5 +62,37 @@ namespace Apache.Arrow.Types
 
             return new MapType(Fields[0], keySorted: false);
         }
+
+        private static Field Entries(IArrowType key, IArrowType value, bool 
nullable) =>
+            new Field(EntriesKey, NewStruct(new Field(KeyKey, key, false), new 
Field(ValueKey, value, nullable)), false);
+
+        private static Field Entries(Field key, Field value) =>
+            new Field(EntriesKey, NewStruct(NamedField(KeyKey, key), 
NamedField(ValueKey, value)), false);
+
+        private static Field Entries(StructType entries)
+        {
+            return new Field(EntriesKey, Struct(entries), false);
+        }
+
+        private static StructType NewStruct(Field key, Field value) => new 
StructType(new[] { key, value });
+
+        private static StructType Struct(StructType entries)
+        {
+            Field key = NamedField(KeyKey, entries.Fields[0]);
+            Field value = NamedField(ValueKey, entries.Fields[1]);
+            return object.ReferenceEquals(key, entries.Fields[0]) && 
object.ReferenceEquals(value, entries.Fields[1]) ? entries : NewStruct(key, 
value);
+        }
+
+        private static Field Entries(Field entries)
+        {
+            StructType structType = (StructType)entries.DataType;
+            StructType adjustedStruct = Struct(structType);
+            return StringComparer.Ordinal.Equals(entries.Name, EntriesKey) && 
object.ReferenceEquals(structType, adjustedStruct) ? entries : new 
Field(EntriesKey, adjustedStruct, false);
+        }
+
+        private static Field NamedField(string name, Field field)
+        {
+            return StringComparer.Ordinal.Equals(name, field.Name) ? field : 
new Field(name, field.DataType, field.IsNullable, field.Metadata);
+        }
     }
 }
diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs 
b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
index 51bcf6dd75..f3fe73588a 100644
--- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
+++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
@@ -120,6 +120,12 @@ namespace Apache.Arrow.IntegrationTest
             {
                 builder.Field(f => CreateField(f, jsonSchema.Fields[i], 
dictionaryIndexes));
             }
+
+            if (jsonSchema.Metadata != null)
+            {
+                builder.Metadata(jsonSchema.Metadata);
+            }
+
             return builder.Build();
         }
 
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs 
b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
index a3b53a40db..83902d8d93 100644
--- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
@@ -829,6 +829,50 @@ namespace Apache.Arrow.Tests
             CArrowArrayStream.Free(cArrayStream);
         }
 
+        [SkippableFact]
+        public unsafe void ImportRecordBatchFromBuffer()
+        {
+            using (Py.GIL())
+            {
+                dynamic pa = Py.Import("pyarrow");
+                dynamic batch = pa.record_batch(
+                    new PyList(new PyObject[]
+                    {
+                        pa.array(new long?[] { 1, 2, 3, null, 5 }),
+                        pa.array(new[] { "hello", "world", null, "foo", "bar" 
}),
+                        pa.array(new[] { 0.0, 1.4, 2.5, 3.6, 4.7 }),
+                        pa.array(new bool?[] { true, false, null, false, true 
}),
+                    }),
+                    new[] { null, "", "column", "column" });
+
+                dynamic sink = pa.BufferOutputStream();
+                dynamic writer = pa.ipc.new_stream(sink, batch.schema);
+                writer.write_batch(batch);
+                ((IDisposable)writer).Dispose();
+
+                dynamic buf = sink.getvalue();
+                // buf.address, buf.size
+
+                IntPtr address = (IntPtr)(long)buf.address;
+                int size = buf.size;
+                byte[] buffer = new byte[size];
+                byte* ptr = (byte*)address.ToPointer();
+                for (int i = 0; i < size; i++)
+                {
+                    buffer[i] = ptr[i];
+                }
+
+                using (ArrowStreamReader reader = new ArrowStreamReader(new 
ReadOnlyMemory<byte>(buffer)))
+                {
+                    RecordBatch batch2 = reader.ReadNextRecordBatch();
+                    Assert.Equal("None", batch2.Schema.FieldsList[0].Name);
+                    Assert.Equal("", batch2.Schema.FieldsList[1].Name);
+                    Assert.Equal("column", batch2.Schema.FieldsList[2].Name);
+                    Assert.Equal("column", batch2.Schema.FieldsList[3].Name);
+                }
+            }
+        }
+
         private static PyObject List(params int?[] values)
         {
             return new PyList(values.Select(i => i == null ? PyObject.None : 
new PyInt(i.Value)).ToArray());
diff --git a/dev/archery/archery/integration/datagen.py 
b/dev/archery/archery/integration/datagen.py
index eca1effff6..29b203ae13 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1812,7 +1812,6 @@ def get_generated_json_files(tempdir=None):
         generate_map_case(),
 
         generate_non_canonical_map_case()
-        .skip_tester('C#')
         .skip_tester('Java')  # TODO(ARROW-8715)
         # Canonical map names are restored on import, so the schemas are 
unequal
         .skip_format(SKIP_C_SCHEMA, 'C++'),
@@ -1827,11 +1826,9 @@ def get_generated_json_files(tempdir=None):
 
         generate_unions_case(),
 
-        generate_custom_metadata_case()
-        .skip_tester('C#'),
+        generate_custom_metadata_case(),
 
         generate_duplicate_fieldnames_case()
-        .skip_tester('C#')
         .skip_tester('JS'),
 
         generate_dictionary_case(),
@@ -1856,7 +1853,6 @@ def get_generated_json_files(tempdir=None):
         .skip_tester('Rust'),
 
         generate_extension_case()
-        .skip_tester('C#')
         # TODO: ensure the extension is registered in the C++ entrypoint
         .skip_format(SKIP_C_SCHEMA, 'C++')
         .skip_format(SKIP_C_ARRAY, 'C++'),

Reply via email to