This is an automated email from the ASF dual-hosted git repository.

curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 48a9639bb0 GH-41140: [C#] Account for offset and length in union 
arrays (#41165)
48a9639bb0 is described below

commit 48a9639bb0bae600adf846fbdcba77aaca151fc0
Author: Adam Reeve <[email protected]>
AuthorDate: Sat Apr 13 01:04:36 2024 +1200

    GH-41140: [C#] Account for offset and length in union arrays (#41165)
    
    ### Rationale for this change
    
    See #41140. This makes a sliced union array behave as expected without 
having to manually account for the array offset unless accessing the underlying 
buffers.
    
    ### What changes are included in this PR?
    
    Accounts for the offset and length when getting type ids, value offsets and 
field arrays for sparse and dense union arrays.
    
    ### Are these changes tested?
    
    Yes, I've updated the union array tests to cover this.
    
    ### Are there any user-facing changes?
    
    Yes, this is a user facing bug fix.
    * GitHub Issue: #41140
    
    Authored-by: Adam Reeve <[email protected]>
    Signed-off-by: Curt Hagenlocher <[email protected]>
---
 csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs  |  3 +-
 csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs |  1 -
 csharp/src/Apache.Arrow/Arrays/UnionArray.cs       | 13 +++-
 csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs  | 87 ++++++++++++++++++----
 4 files changed, 85 insertions(+), 19 deletions(-)

diff --git a/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs 
b/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs
index 459a30e221..79880c894b 100644
--- a/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs
@@ -24,7 +24,7 @@ namespace Apache.Arrow
     {
         public ArrowBuffer ValueOffsetBuffer => Data.Buffers[1];
 
-        public ReadOnlySpan<int> ValueOffsets => 
ValueOffsetBuffer.Span.CastTo<int>();
+        public ReadOnlySpan<int> ValueOffsets => 
ValueOffsetBuffer.Span.CastTo<int>().Slice(Offset, Length);
 
         public DenseUnionArray(
             IArrowType dataType,
@@ -38,7 +38,6 @@ namespace Apache.Arrow
                 dataType, length, nullCount, offset, new[] { typeIds, 
valuesOffsetBuffer },
                 children.Select(child => child.Data)))
         {
-            _fields = children.ToArray();
             ValidateMode(UnionMode.Dense, Type.Mode);
         }
 
diff --git a/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs 
b/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs
index ef55786f01..5b29489ebb 100644
--- a/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs
@@ -32,7 +32,6 @@ namespace Apache.Arrow
                 dataType, length, nullCount, offset, new[] { typeIds },
                 children.Select(child => child.Data)))
         {
-            _fields = children.ToArray();
             ValidateMode(UnionMode.Sparse, Type.Mode);
         }
 
diff --git a/csharp/src/Apache.Arrow/Arrays/UnionArray.cs 
b/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
index f96f527135..c1deb9b651 100644
--- a/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
@@ -25,7 +25,7 @@ namespace Apache.Arrow
         protected IReadOnlyList<IArrowArray> _fields;
 
         public IReadOnlyList<IArrowArray> Fields =>
-            LazyInitializer.EnsureInitialized(ref _fields, () => 
InitializeFields());
+            LazyInitializer.EnsureInitialized(ref _fields, InitializeFields);
 
         public ArrayData Data { get; }
 
@@ -35,7 +35,7 @@ namespace Apache.Arrow
 
         public ArrowBuffer TypeBuffer => Data.Buffers[0];
 
-        public ReadOnlySpan<byte> TypeIds => TypeBuffer.Span;
+        public ReadOnlySpan<byte> TypeIds => TypeBuffer.Span.Slice(Offset, 
Length);
 
         public int Length => Data.Length;
 
@@ -106,7 +106,14 @@ namespace Apache.Arrow
             IArrowArray[] result = new IArrowArray[Data.Children.Length];
             for (int i = 0; i < Data.Children.Length; i++)
             {
-                result[i] = ArrowArrayFactory.BuildArray(Data.Children[i]);
+                var childData = Data.Children[i];
+                if (Mode == UnionMode.Sparse && (Data.Offset != 0 || 
childData.Length != Data.Length))
+                {
+                    // We only slice the child data for sparse mode,
+                    // so that the sliced value offsets remain valid in dense 
mode
+                    childData = childData.Slice(Data.Offset, Data.Length);
+                }
+                result[i] = ArrowArrayFactory.BuildArray(childData);
             }
             return result;
         }
diff --git a/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs 
b/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs
index 45fed722a7..712a87a252 100644
--- a/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs
@@ -13,6 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+using System;
 using System.Linq;
 using Apache.Arrow.Types;
 using Xunit;
@@ -24,7 +25,7 @@ public class UnionArrayTests
     [Theory]
     [InlineData(UnionMode.Sparse)]
     [InlineData(UnionMode.Dense)]
-    public void UnionArray_IsNull(UnionMode mode)
+    public void UnionArrayIsNull(UnionMode mode)
     {
         var (array, expectedNull) = BuildUnionArray(mode, 100);
 
@@ -38,7 +39,7 @@ public class UnionArrayTests
     [Theory]
     [InlineData(UnionMode.Sparse)]
     [InlineData(UnionMode.Dense)]
-    public void UnionArray_Slice(UnionMode mode)
+    public void UnionArraySlice(UnionMode mode)
     {
         var (array, expectedNull) = BuildUnionArray(mode, 10);
 
@@ -46,32 +47,92 @@ public class UnionArrayTests
         {
             for (var length = 0; length < array.Length - offset; ++length)
             {
-                var slicedArray = ArrowArrayFactory.Slice(array, offset, 
length);
+                var slicedArray = (UnionArray)ArrowArrayFactory.Slice(array, 
offset, length);
 
                 var nullCount = 0;
                 for (var i = 0; i < slicedArray.Length; ++i)
                 {
-                    // TODO: Shouldn't need to add offset in IsNull/IsValid 
calls,
-                    // see https://github.com/apache/arrow/issues/41140
-                    Assert.Equal(expectedNull[offset + i], 
slicedArray.IsNull(offset + i));
-                    Assert.Equal(!expectedNull[offset + i], 
slicedArray.IsValid(offset + i));
+                    Assert.Equal(expectedNull[offset + i], 
slicedArray.IsNull(i));
+                    Assert.Equal(!expectedNull[offset + i], 
slicedArray.IsValid(i));
                     nullCount += expectedNull[offset + i] ? 1 : 0;
+
+                    CompareValue(array, offset + i, slicedArray, i);
                 }
 
-                Assert.True(nullCount == slicedArray.NullCount, $"offset = 
{offset}, length = {length}");
                 Assert.Equal(nullCount, slicedArray.NullCount);
             }
         }
     }
 
-    private static (UnionArray array, bool[] isNull) BuildUnionArray(UnionMode 
mode, int length)
+    [Theory]
+    [InlineData(UnionMode.Sparse)]
+    [InlineData(UnionMode.Dense)]
+    public void UnionArrayConstructedWithOffset(UnionMode mode)
+    {
+        const int length = 10;
+        var (array, expectedNull) = BuildUnionArray(mode, length);
+
+        for (var offset = 0; offset < array.Length; ++offset)
+        {
+            var (slicedArray, _) = BuildUnionArray(mode, length, offset);
+
+            var nullCount = 0;
+            for (var i = 0; i < slicedArray.Length; ++i)
+            {
+                Assert.Equal(expectedNull[offset + i], slicedArray.IsNull(i));
+                Assert.Equal(!expectedNull[offset + i], 
slicedArray.IsValid(i));
+                nullCount += expectedNull[offset + i] ? 1 : 0;
+
+                CompareValue(array, offset + i, slicedArray, i);
+            }
+
+            Assert.Equal(nullCount, slicedArray.NullCount);
+        }
+    }
+
+    private static void CompareValue(UnionArray originalArray, int 
originalIndex, UnionArray slicedArray, int sliceIndex)
+    {
+        var typeId = originalArray.TypeIds[originalIndex];
+        var sliceTypeId = slicedArray.TypeIds[sliceIndex];
+        Assert.Equal(typeId, sliceTypeId);
+
+        switch (typeId)
+        {
+            case 0:
+                CompareFieldValue<int, Int32Array>(typeId, originalArray, 
originalIndex, slicedArray, sliceIndex);
+                break;
+            case 1:
+                CompareFieldValue<float, FloatArray>(typeId, originalArray, 
originalIndex, slicedArray, sliceIndex);
+                break;
+            default:
+                throw new Exception($"Unexpected type id {typeId}");
+        }
+    }
+
+    private static void CompareFieldValue<T, TArray>(byte typeId, UnionArray 
originalArray, int originalIndex, UnionArray slicedArray, int sliceIndex)
+        where T: struct
+        where TArray : PrimitiveArray<T>
+    {
+        if (originalArray is DenseUnionArray denseOriginalArray)
+        {
+            Assert.IsType<DenseUnionArray>(slicedArray);
+
+            originalIndex = denseOriginalArray.ValueOffsets[originalIndex];
+            sliceIndex = 
((DenseUnionArray)slicedArray).ValueOffsets[sliceIndex];
+        }
+        var originalValue = 
((TArray)originalArray.Fields[typeId]).GetValue(originalIndex);
+        var sliceValue = 
((TArray)slicedArray.Fields[typeId]).GetValue(sliceIndex);
+        Assert.Equal(originalValue, sliceValue);
+    }
+
+    private static (UnionArray array, bool[] isNull) BuildUnionArray(UnionMode 
mode, int length, int offset=0)
     {
         var fields = new Field[]
         {
             new Field("field0", new Int32Type(), true),
             new Field("field1", new FloatType(), true),
         };
-        var typeIds = fields.Select(f => (int) f.DataType.TypeId).ToArray();
+        var typeIds = new[] { 0, 1 };
         var type = new UnionType(fields, typeIds, mode);
 
         var nullCount = 0;
@@ -85,7 +146,7 @@ public class UnionArrayTests
         {
             var isNull = i % 3 == 0;
             expectedNull[i] = isNull;
-            nullCount += isNull ? 1 : 0;
+            nullCount += (isNull && i >= offset) ? 1 : 0;
 
             if (i % 2 == 0)
             {
@@ -140,8 +201,8 @@ public class UnionArrayTests
         };
 
         UnionArray array = mode == UnionMode.Dense
-            ? new DenseUnionArray(type, length, children, typeIdsBuffer, 
valuesOffsetBuffer, nullCount)
-            : new SparseUnionArray(type, length, children, typeIdsBuffer, 
nullCount);
+            ? new DenseUnionArray(type, length - offset, children, 
typeIdsBuffer, valuesOffsetBuffer, nullCount, offset)
+            : new SparseUnionArray(type, length - offset, children, 
typeIdsBuffer, nullCount, offset);
 
         return (array, expectedNull);
     }

Reply via email to