This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new e0f31aa1d4 GH-41198: [C#] Fix concatenation of union arrays (#41226)
e0f31aa1d4 is described below
commit e0f31aa1d4007ebce01cd4bca369d12c4d083162
Author: Adam Reeve <[email protected]>
AuthorDate: Tue Apr 16 13:50:31 2024 +1200
GH-41198: [C#] Fix concatenation of union arrays (#41226)
### Rationale for this change
Fixes concatenation of union arrays.
### What changes are included in this PR?
* Re-enables union array concatenation tests that were disabled in #41197
after making union array comparisons more thorough in the `ArrowReaderVerifier`
* Updates the union array concatenation logic to account for array lengths
when concatenating the type and offset buffers, and fixes how the base offset
is calculated.
* Fixes creating the type buffers for the array concatenation tests.
### Are these changes tested?
Yes, this uses the existing `ArrowArrayConcatenatorTests` tests.
### Are there any user-facing changes?
Yes, this is a user-facing bug fix.
* GitHub Issue: #41198
Authored-by: Adam Reeve <[email protected]>
Signed-off-by: Curt Hagenlocher <[email protected]>
---
.../Apache.Arrow/Arrays/ArrayDataConcatenator.cs | 22 +++++++++++++++-------
.../ArrowArrayConcatenatorTests.cs | 9 ++-------
2 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
index 84658a5fab..347d0d76ba 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
@@ -367,7 +367,7 @@ namespace Apache.Arrow
foreach (ArrayData arrayData in _arrayDataList)
{
- builder.Append(arrayData.Buffers[0]);
+
builder.Append(arrayData.Buffers[0].Span.Slice(arrayData.Offset,
arrayData.Length));
}
return builder.Build(_allocator);
@@ -376,18 +376,26 @@ namespace Apache.Arrow
private ArrowBuffer ConcatenateUnionOffsetBuffer()
{
var builder = new ArrowBuffer.Builder<int>(_totalLength);
- int baseOffset = 0;
+ var typeCount = _arrayDataList.Count > 0 ?
_arrayDataList[0].Children.Length : 0;
+ var baseOffsets = new int[typeCount];
foreach (ArrayData arrayData in _arrayDataList)
{
- ReadOnlySpan<int> span =
arrayData.Buffers[1].Span.CastTo<int>();
- foreach (int offset in span)
+ ReadOnlySpan<byte> typeSpan =
arrayData.Buffers[0].Span.Slice(arrayData.Offset, arrayData.Length);
+ ReadOnlySpan<int> offsetSpan =
arrayData.Buffers[1].Span.CastTo<int>().Slice(arrayData.Offset,
arrayData.Length);
+ for (int i = 0; i < arrayData.Length; ++i)
{
- builder.Append(baseOffset + offset);
+ var typeId = typeSpan[i];
+ builder.Append(checked(baseOffsets[typeId] +
offsetSpan[i]));
}
- // The next offset must start from the current last offset.
- baseOffset += span[arrayData.Length];
+ for (int i = 0; i < typeCount; ++i)
+ {
+ checked
+ {
+ baseOffsets[i] += arrayData.Children[i].Length;
+ }
+ }
}
return builder.Build(_allocator);
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
index 700de58adb..a1f6b1b8d8 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
@@ -29,12 +29,6 @@ namespace Apache.Arrow.Tests
{
foreach ((List<IArrowArray> testTargetArrayList, IArrowArray
expectedArray) in GenerateTestData())
{
- if (expectedArray is UnionArray)
- {
- // Union array concatenation is incorrect. See
https://github.com/apache/arrow/issues/41198
- continue;
- }
-
IArrowArray actualArray =
ArrowArrayConcatenator.Concatenate(testTargetArrayList);
ArrowReaderVerifier.CompareArrays(expectedArray, actualArray);
}
@@ -604,10 +598,11 @@ namespace Apache.Arrow.Tests
for (int j = 0; j < dataList.Count; j++)
{
- byte index = (byte)Math.Max(j % 3, 1);
+ byte index = (byte)Math.Min(j % 3, 1);
int? intValue = (index == 1) ? dataList[j] : null;
string stringValue = (index == 1) ? null :
dataList[j]?.ToString();
typeBuilder.Append(index);
+ typeResultBuilder.Append(index);
if (isDense)
{