This is an automated email from the ASF dual-hosted git repository.

curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-dotnet.git


The following commit(s) were added to refs/heads/main by this push:
     new fc49cf3  GH-301: [C#] Use an index lookup for O(1) field index access 
(#300)
fc49cf3 is described below

commit fc49cf334a5507829ac46e62de6c93022a32ca22
Author: Vasilis Themelis <[email protected]>
AuthorDate: Tue Mar 31 01:49:21 2026 +0100

    GH-301: [C#] Use an index lookup for O(1) field index access (#300)
    
    Closes #301.
    
    Ports the optimization from the closed PR at
    https://github.com/apache/arrow/pull/44633 into the new .NET-specific
    repository.
    
    The original PR was closed on November 18, 2025 with the note that the
    C# implementation had moved to a new repository.
    
    This version keeps the current `arrow-dotnet` behavior intact:
    - `GetFieldIndex(..., comparer: null)` and the default path now use a
    cached `CurrentCulture` index lookup for the common case.
    - Missing fields still return `-1`.
    - Duplicate field names still return the first match.
    - Non-default comparers still fall back to the existing linear scan.
    
    I also added dedicated schema tests covering:
    - `null`, `Ordinal`, `OrdinalIgnoreCase`, and `CurrentCulture` comparers
    - duplicate-name lookup returning the first match
    - missing-name behavior for each comparer
    
    Local verification:
    - `dotnet build test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj`
    - `DOTNET_ROLL_FORWARD=Major DOTNET_ROLL_FORWARD_TO_PRERELEASE=1 dotnet
    test test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj --no-restore
    --logger 'console;verbosity=minimal'`
---
 src/Apache.Arrow/Schema.cs                    | 23 +++++-----
 test/Apache.Arrow.Tests/SchemaBuilderTests.cs | 11 +++++
 test/Apache.Arrow.Tests/SchemaTests.cs        | 65 +++++++++++++++++++++++++++
 3 files changed, 88 insertions(+), 11 deletions(-)

diff --git a/src/Apache.Arrow/Schema.cs b/src/Apache.Arrow/Schema.cs
index 04e0e83..528adb9 100644
--- a/src/Apache.Arrow/Schema.cs
+++ b/src/Apache.Arrow/Schema.cs
@@ -31,6 +31,7 @@ namespace Apache.Arrow
         private readonly List<Field> _fieldsList;
 
         public ILookup<string, Field> FieldsLookup { get; }
+        private readonly ILookup<string, int> _fieldsIndexLookup;
 
         public IReadOnlyDictionary<string, string> Metadata { get; }
 
@@ -43,17 +44,11 @@ namespace Apache.Arrow
         public Schema(
             IEnumerable<Field> fields,
             IEnumerable<KeyValuePair<string, string>> metadata)
+            : this(
+                fields?.ToList() ?? throw new 
ArgumentNullException(nameof(fields)),
+                metadata?.ToDictionary(kv => kv.Key, kv => kv.Value),
+                false)
         {
-            if (fields is null)
-            {
-                throw new ArgumentNullException(nameof(fields));
-            }
-
-            _fieldsList = fields.ToList();
-            FieldsLookup = _fieldsList.ToLookup(f => f.Name);
-            _fieldsDictionary = FieldsLookup.ToDictionary(g => g.Key, g => 
g.First());
-
-            Metadata = metadata?.ToDictionary(kv => kv.Key, kv => kv.Value);
         }
 
         internal Schema(List<Field> fieldsList, IReadOnlyDictionary<string, 
string> metadata, bool copyCollections)
@@ -64,6 +59,9 @@ namespace Apache.Arrow
             _fieldsList = fieldsList;
             FieldsLookup = _fieldsList.ToLookup(f => f.Name);
             _fieldsDictionary = FieldsLookup.ToDictionary(g => g.Key, g => 
g.First());
+            _fieldsIndexLookup = _fieldsList
+                .Select((field, index) => (field.Name, index))
+                .ToLookup(item => item.Name, item => item.index, 
StringComparer.CurrentCulture);
 
             Metadata = metadata;
         }
@@ -80,7 +78,10 @@ namespace Apache.Arrow
 
         public int GetFieldIndex(string name, IEqualityComparer<string> 
comparer = default)
         {
-            comparer ??= StringComparer.CurrentCulture;
+            if (comparer == null || ReferenceEquals(comparer, 
StringComparer.CurrentCulture))
+            {
+                return _fieldsIndexLookup[name].DefaultIfEmpty(-1).First();
+            }
 
             for (int i = 0; i < _fieldsList.Count; i++)
             {
diff --git a/test/Apache.Arrow.Tests/SchemaBuilderTests.cs 
b/test/Apache.Arrow.Tests/SchemaBuilderTests.cs
index 2691e20..e8cb1fb 100644
--- a/test/Apache.Arrow.Tests/SchemaBuilderTests.cs
+++ b/test/Apache.Arrow.Tests/SchemaBuilderTests.cs
@@ -117,6 +117,17 @@ namespace Apache.Arrow.Tests
                 Assert.Equal(2, schema.GetFieldIndex("f1"));
             }
 
+            [Fact]
+            public void 
GetFieldIndexWithComparerReturnsMinusOneWhenFieldDoesNotExist()
+            {
+                var schema = new Schema.Builder()
+                    .Field(f => f.Name("f0").DataType(Int32Type.Default))
+                    .Build();
+
+                Assert.Equal(-1, schema.GetFieldIndex("F0", 
StringComparer.Ordinal));
+                Assert.Equal(-1, schema.GetFieldIndex("f1", 
StringComparer.OrdinalIgnoreCase));
+            }
+
             [Fact]
             public void GetFieldByName()
             {
diff --git a/test/Apache.Arrow.Tests/SchemaTests.cs 
b/test/Apache.Arrow.Tests/SchemaTests.cs
new file mode 100644
index 0000000..f18cc7c
--- /dev/null
+++ b/test/Apache.Arrow.Tests/SchemaTests.cs
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using Apache.Arrow.Types;
+using Xunit;
+
+namespace Apache.Arrow.Tests;
+
+public class SchemaTests
+{
+    [Fact]
+    public void ThrowsWhenFieldsAreNull()
+    {
+        Assert.Throws<ArgumentNullException>(() => new Schema(null, null));
+    }
+
+    [Theory]
+    [MemberData(nameof(StringComparers))]
+    public void CanRetrieveFieldIndexByName(StringComparer comparer)
+    {
+        var field0 = new Field("f0", Int32Type.Default, true);
+        var field1 = new Field("f1", Int64Type.Default, true);
+        var schema = new Schema(new[] { field0, field1 }, null);
+
+        Assert.Equal(0, schema.GetFieldIndex("f0", comparer));
+        Assert.Equal(1, schema.GetFieldIndex("f1", comparer));
+        Assert.Equal(-1, schema.GetFieldIndex("nonexistent", comparer));
+    }
+
+    [Theory]
+    [MemberData(nameof(StringComparers))]
+    public void CanRetrieveFieldIndexByNonUniqueName(StringComparer comparer)
+    {
+        var field0 = new Field("f0", Int32Type.Default, true);
+        var field1 = new Field("f1", Int64Type.Default, true);
+
+        var schema = new Schema(new[] { field0, field1, field0, field1 }, 
null);
+
+        Assert.Equal(0, schema.GetFieldIndex("f0", comparer));
+        Assert.Equal(1, schema.GetFieldIndex("f1", comparer));
+        Assert.Equal(-1, schema.GetFieldIndex("nonexistent", comparer));
+    }
+
+    public static IEnumerable<object[]> StringComparers()
+    {
+        yield return new object[] { null };
+        yield return new object[] { StringComparer.Ordinal };
+        yield return new object[] { StringComparer.OrdinalIgnoreCase };
+        yield return new object[] { StringComparer.CurrentCulture };
+    }
+}

Reply via email to