This is an automated email from the ASF dual-hosted git repository.

curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new e0f31aa1d4 GH-41198: [C#] Fix concatenation of union arrays (#41226)
e0f31aa1d4 is described below

commit e0f31aa1d4007ebce01cd4bca369d12c4d083162
Author: Adam Reeve <[email protected]>
AuthorDate: Tue Apr 16 13:50:31 2024 +1200

    GH-41198: [C#] Fix concatenation of union arrays (#41226)
    
    ### Rationale for this change
    
    Fixes concatenation of union arrays.
    
    ### What changes are included in this PR?
    
    * Re-enables union array concatenation tests that were disabled in #41197 
after making union array comparisons more thorough in the `ArrowReaderVerifier`
    * Updates the union array concatenation logic to account for array lengths 
when concatenating the type and offset buffers, and fixes how the base offset 
is calculated.
    * Fixes creating the type buffers for the array concatenation tests.
    
    ### Are these changes tested?
    
    Yes, this uses the existing `ArrowArrayConcatenatorTests` tests.
    
    ### Are there any user-facing changes?
    
    Yes, this is a user-facing bug fix.
    * GitHub Issue: #41198
    
    Authored-by: Adam Reeve <[email protected]>
    Signed-off-by: Curt Hagenlocher <[email protected]>
---
 .../Apache.Arrow/Arrays/ArrayDataConcatenator.cs   | 22 +++++++++++++++-------
 .../ArrowArrayConcatenatorTests.cs                 |  9 ++-------
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs 
b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
index 84658a5fab..347d0d76ba 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
@@ -367,7 +367,7 @@ namespace Apache.Arrow
 
                 foreach (ArrayData arrayData in _arrayDataList)
                 {
-                    builder.Append(arrayData.Buffers[0]);
+                    
builder.Append(arrayData.Buffers[0].Span.Slice(arrayData.Offset, 
arrayData.Length));
                 }
 
                 return builder.Build(_allocator);
@@ -376,18 +376,26 @@ namespace Apache.Arrow
             private ArrowBuffer ConcatenateUnionOffsetBuffer()
             {
                 var builder = new ArrowBuffer.Builder<int>(_totalLength);
-                int baseOffset = 0;
+                var typeCount = _arrayDataList.Count > 0 ? 
_arrayDataList[0].Children.Length : 0;
+                var baseOffsets = new int[typeCount];
 
                 foreach (ArrayData arrayData in _arrayDataList)
                 {
-                    ReadOnlySpan<int> span = 
arrayData.Buffers[1].Span.CastTo<int>();
-                    foreach (int offset in span)
+                    ReadOnlySpan<byte> typeSpan = 
arrayData.Buffers[0].Span.Slice(arrayData.Offset, arrayData.Length);
+                    ReadOnlySpan<int> offsetSpan = 
arrayData.Buffers[1].Span.CastTo<int>().Slice(arrayData.Offset, 
arrayData.Length);
+                    for (int i = 0; i < arrayData.Length; ++i)
                     {
-                        builder.Append(baseOffset + offset);
+                        var typeId = typeSpan[i];
+                        builder.Append(checked(baseOffsets[typeId] + 
offsetSpan[i]));
                     }
 
-                    // The next offset must start from the current last offset.
-                    baseOffset += span[arrayData.Length];
+                    for (int i = 0; i < typeCount; ++i)
+                    {
+                        checked
+                        {
+                            baseOffsets[i] += arrayData.Children[i].Length;
+                        }
+                    }
                 }
 
                 return builder.Build(_allocator);
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs 
b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
index 700de58adb..a1f6b1b8d8 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
@@ -29,12 +29,6 @@ namespace Apache.Arrow.Tests
         {
             foreach ((List<IArrowArray> testTargetArrayList, IArrowArray 
expectedArray) in GenerateTestData())
             {
-                if (expectedArray is UnionArray)
-                {
-                    // Union array concatenation is incorrect. See 
https://github.com/apache/arrow/issues/41198
-                    continue;
-                }
-
                 IArrowArray actualArray = 
ArrowArrayConcatenator.Concatenate(testTargetArrayList);
                 ArrowReaderVerifier.CompareArrays(expectedArray, actualArray);
             }
@@ -604,10 +598,11 @@ namespace Apache.Arrow.Tests
 
                     for (int j = 0; j < dataList.Count; j++)
                     {
-                        byte index = (byte)Math.Max(j % 3, 1);
+                        byte index = (byte)Math.Min(j % 3, 1);
                         int? intValue = (index == 1) ? dataList[j] : null;
                         string stringValue = (index == 1) ? null : 
dataList[j]?.ToString();
                         typeBuilder.Append(index);
+                        typeResultBuilder.Append(index);
 
                         if (isDense)
                         {

Reply via email to