This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 48a9639bb0 GH-41140: [C#] Account for offset and length in union
arrays (#41165)
48a9639bb0 is described below
commit 48a9639bb0bae600adf846fbdcba77aaca151fc0
Author: Adam Reeve <[email protected]>
AuthorDate: Sat Apr 13 01:04:36 2024 +1200
GH-41140: [C#] Account for offset and length in union arrays (#41165)
### Rationale for this change
See #41140. This makes a sliced union array behave as expected without
having to manually account for the array offset unless accessing the underlying
buffers.
### What changes are included in this PR?
Accounts for the offset and length when getting type ids, value offsets and
field arrays for sparse and dense union arrays.
### Are these changes tested?
Yes, I've updated the union array tests to cover this.
### Are there any user-facing changes?
Yes, this is a user facing bug fix.
* GitHub Issue: #41140
Authored-by: Adam Reeve <[email protected]>
Signed-off-by: Curt Hagenlocher <[email protected]>
---
csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs | 3 +-
csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs | 1 -
csharp/src/Apache.Arrow/Arrays/UnionArray.cs | 13 +++-
csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs | 87 ++++++++++++++++++----
4 files changed, 85 insertions(+), 19 deletions(-)
diff --git a/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs
b/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs
index 459a30e221..79880c894b 100644
--- a/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs
@@ -24,7 +24,7 @@ namespace Apache.Arrow
{
public ArrowBuffer ValueOffsetBuffer => Data.Buffers[1];
- public ReadOnlySpan<int> ValueOffsets =>
ValueOffsetBuffer.Span.CastTo<int>();
+ public ReadOnlySpan<int> ValueOffsets =>
ValueOffsetBuffer.Span.CastTo<int>().Slice(Offset, Length);
public DenseUnionArray(
IArrowType dataType,
@@ -38,7 +38,6 @@ namespace Apache.Arrow
dataType, length, nullCount, offset, new[] { typeIds,
valuesOffsetBuffer },
children.Select(child => child.Data)))
{
- _fields = children.ToArray();
ValidateMode(UnionMode.Dense, Type.Mode);
}
diff --git a/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs
b/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs
index ef55786f01..5b29489ebb 100644
--- a/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs
@@ -32,7 +32,6 @@ namespace Apache.Arrow
dataType, length, nullCount, offset, new[] { typeIds },
children.Select(child => child.Data)))
{
- _fields = children.ToArray();
ValidateMode(UnionMode.Sparse, Type.Mode);
}
diff --git a/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
b/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
index f96f527135..c1deb9b651 100644
--- a/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
@@ -25,7 +25,7 @@ namespace Apache.Arrow
protected IReadOnlyList<IArrowArray> _fields;
public IReadOnlyList<IArrowArray> Fields =>
- LazyInitializer.EnsureInitialized(ref _fields, () =>
InitializeFields());
+ LazyInitializer.EnsureInitialized(ref _fields, InitializeFields);
public ArrayData Data { get; }
@@ -35,7 +35,7 @@ namespace Apache.Arrow
public ArrowBuffer TypeBuffer => Data.Buffers[0];
- public ReadOnlySpan<byte> TypeIds => TypeBuffer.Span;
+ public ReadOnlySpan<byte> TypeIds => TypeBuffer.Span.Slice(Offset,
Length);
public int Length => Data.Length;
@@ -106,7 +106,14 @@ namespace Apache.Arrow
IArrowArray[] result = new IArrowArray[Data.Children.Length];
for (int i = 0; i < Data.Children.Length; i++)
{
- result[i] = ArrowArrayFactory.BuildArray(Data.Children[i]);
+ var childData = Data.Children[i];
+ if (Mode == UnionMode.Sparse && (Data.Offset != 0 ||
childData.Length != Data.Length))
+ {
+ // We only slice the child data for sparse mode,
+ // so that the sliced value offsets remain valid in dense
mode
+ childData = childData.Slice(Data.Offset, Data.Length);
+ }
+ result[i] = ArrowArrayFactory.BuildArray(childData);
}
return result;
}
diff --git a/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs
b/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs
index 45fed722a7..712a87a252 100644
--- a/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs
@@ -13,6 +13,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+using System;
using System.Linq;
using Apache.Arrow.Types;
using Xunit;
@@ -24,7 +25,7 @@ public class UnionArrayTests
[Theory]
[InlineData(UnionMode.Sparse)]
[InlineData(UnionMode.Dense)]
- public void UnionArray_IsNull(UnionMode mode)
+ public void UnionArrayIsNull(UnionMode mode)
{
var (array, expectedNull) = BuildUnionArray(mode, 100);
@@ -38,7 +39,7 @@ public class UnionArrayTests
[Theory]
[InlineData(UnionMode.Sparse)]
[InlineData(UnionMode.Dense)]
- public void UnionArray_Slice(UnionMode mode)
+ public void UnionArraySlice(UnionMode mode)
{
var (array, expectedNull) = BuildUnionArray(mode, 10);
@@ -46,32 +47,92 @@ public class UnionArrayTests
{
for (var length = 0; length < array.Length - offset; ++length)
{
- var slicedArray = ArrowArrayFactory.Slice(array, offset,
length);
+ var slicedArray = (UnionArray)ArrowArrayFactory.Slice(array,
offset, length);
var nullCount = 0;
for (var i = 0; i < slicedArray.Length; ++i)
{
- // TODO: Shouldn't need to add offset in IsNull/IsValid
calls,
- // see https://github.com/apache/arrow/issues/41140
- Assert.Equal(expectedNull[offset + i],
slicedArray.IsNull(offset + i));
- Assert.Equal(!expectedNull[offset + i],
slicedArray.IsValid(offset + i));
+ Assert.Equal(expectedNull[offset + i],
slicedArray.IsNull(i));
+ Assert.Equal(!expectedNull[offset + i],
slicedArray.IsValid(i));
nullCount += expectedNull[offset + i] ? 1 : 0;
+
+ CompareValue(array, offset + i, slicedArray, i);
}
- Assert.True(nullCount == slicedArray.NullCount, $"offset =
{offset}, length = {length}");
Assert.Equal(nullCount, slicedArray.NullCount);
}
}
}
- private static (UnionArray array, bool[] isNull) BuildUnionArray(UnionMode
mode, int length)
+ [Theory]
+ [InlineData(UnionMode.Sparse)]
+ [InlineData(UnionMode.Dense)]
+ public void UnionArrayConstructedWithOffset(UnionMode mode)
+ {
+ const int length = 10;
+ var (array, expectedNull) = BuildUnionArray(mode, length);
+
+ for (var offset = 0; offset < array.Length; ++offset)
+ {
+ var (slicedArray, _) = BuildUnionArray(mode, length, offset);
+
+ var nullCount = 0;
+ for (var i = 0; i < slicedArray.Length; ++i)
+ {
+ Assert.Equal(expectedNull[offset + i], slicedArray.IsNull(i));
+ Assert.Equal(!expectedNull[offset + i],
slicedArray.IsValid(i));
+ nullCount += expectedNull[offset + i] ? 1 : 0;
+
+ CompareValue(array, offset + i, slicedArray, i);
+ }
+
+ Assert.Equal(nullCount, slicedArray.NullCount);
+ }
+ }
+
+ private static void CompareValue(UnionArray originalArray, int
originalIndex, UnionArray slicedArray, int sliceIndex)
+ {
+ var typeId = originalArray.TypeIds[originalIndex];
+ var sliceTypeId = slicedArray.TypeIds[sliceIndex];
+ Assert.Equal(typeId, sliceTypeId);
+
+ switch (typeId)
+ {
+ case 0:
+ CompareFieldValue<int, Int32Array>(typeId, originalArray,
originalIndex, slicedArray, sliceIndex);
+ break;
+ case 1:
+ CompareFieldValue<float, FloatArray>(typeId, originalArray,
originalIndex, slicedArray, sliceIndex);
+ break;
+ default:
+ throw new Exception($"Unexpected type id {typeId}");
+ }
+ }
+
+ private static void CompareFieldValue<T, TArray>(byte typeId, UnionArray
originalArray, int originalIndex, UnionArray slicedArray, int sliceIndex)
+ where T: struct
+ where TArray : PrimitiveArray<T>
+ {
+ if (originalArray is DenseUnionArray denseOriginalArray)
+ {
+ Assert.IsType<DenseUnionArray>(slicedArray);
+
+ originalIndex = denseOriginalArray.ValueOffsets[originalIndex];
+ sliceIndex =
((DenseUnionArray)slicedArray).ValueOffsets[sliceIndex];
+ }
+ var originalValue =
((TArray)originalArray.Fields[typeId]).GetValue(originalIndex);
+ var sliceValue =
((TArray)slicedArray.Fields[typeId]).GetValue(sliceIndex);
+ Assert.Equal(originalValue, sliceValue);
+ }
+
+ private static (UnionArray array, bool[] isNull) BuildUnionArray(UnionMode
mode, int length, int offset=0)
{
var fields = new Field[]
{
new Field("field0", new Int32Type(), true),
new Field("field1", new FloatType(), true),
};
- var typeIds = fields.Select(f => (int) f.DataType.TypeId).ToArray();
+ var typeIds = new[] { 0, 1 };
var type = new UnionType(fields, typeIds, mode);
var nullCount = 0;
@@ -85,7 +146,7 @@ public class UnionArrayTests
{
var isNull = i % 3 == 0;
expectedNull[i] = isNull;
- nullCount += isNull ? 1 : 0;
+ nullCount += (isNull && i >= offset) ? 1 : 0;
if (i % 2 == 0)
{
@@ -140,8 +201,8 @@ public class UnionArrayTests
};
UnionArray array = mode == UnionMode.Dense
- ? new DenseUnionArray(type, length, children, typeIdsBuffer,
valuesOffsetBuffer, nullCount)
- : new SparseUnionArray(type, length, children, typeIdsBuffer,
nullCount);
+ ? new DenseUnionArray(type, length - offset, children,
typeIdsBuffer, valuesOffsetBuffer, nullCount, offset)
+ : new SparseUnionArray(type, length - offset, children,
typeIdsBuffer, nullCount, offset);
return (array, expectedNull);
}