This is an automated email from the ASF dual-hosted git repository. raulcd pushed a commit to branch maint-16.x.x in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 814bb5e991f6e6d3f2cce968c543c058df07bcd9 Author: Adam Reeve <[email protected]> AuthorDate: Thu Apr 11 14:38:49 2024 +1200 GH-41137: [C#] Fix DenseUnionArray IsNull/Valid (#41138) ### Rationale for this change Fixes incorrect logic for IsNull and IsValid on DenseUnionArrays ### What changes are included in this PR? Adds a new abstract `FieldIsValid` method to `UnionArray` to customize behaviour for sparse and dense union arrays. ### Are these changes tested? Yes, I've added unit tests. ### Are there any user-facing changes? Yes, this is a user facing bug fix. * GitHub Issue: #41137 Authored-by: Adam Reeve <[email protected]> Signed-off-by: Curt Hagenlocher <[email protected]> --- csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs | 5 + csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs | 5 + csharp/src/Apache.Arrow/Arrays/UnionArray.cs | 4 +- csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs | 113 +++++++++++++++++++++ 4 files changed, 126 insertions(+), 1 deletion(-) diff --git a/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs b/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs index 1aacbe11f0..b6b61c560e 100644 --- a/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs @@ -48,5 +48,10 @@ namespace Apache.Arrow ValidateMode(UnionMode.Dense, Type.Mode); data.EnsureBufferCount(2); } + + protected override bool FieldIsValid(IArrowArray fieldArray, int index) + { + return fieldArray.IsValid(ValueOffsets[index]); + } } } diff --git a/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs b/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs index b79c44c979..07d36e25cf 100644 --- a/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs @@ -42,5 +42,10 @@ namespace Apache.Arrow ValidateMode(UnionMode.Sparse, Type.Mode); data.EnsureBufferCount(1); } + + protected override bool FieldIsValid(IArrowArray fieldArray, int index) + { + return fieldArray.IsValid(index); + } } } diff --git a/csharp/src/Apache.Arrow/Arrays/UnionArray.cs b/csharp/src/Apache.Arrow/Arrays/UnionArray.cs index 0a7ae288fd..5fcb276655 100644 --- a/csharp/src/Apache.Arrow/Arrays/UnionArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/UnionArray.cs @@ -43,7 +43,7 @@ namespace Apache.Arrow public int NullCount => Data.NullCount; - public bool IsValid(int index) => NullCount == 0 || Fields[TypeIds[index]].IsValid(index); + public bool IsValid(int index) => NullCount == 0 || FieldIsValid(Fields[TypeIds[index]], index); public bool IsNull(int index) => !IsValid(index); @@ -65,6 +65,8 @@ namespace Apache.Arrow public void Accept(IArrowArrayVisitor visitor) => Array.Accept(this, visitor); + protected abstract bool FieldIsValid(IArrowArray field, int index); + public void Dispose() { Dispose(true); diff --git a/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs b/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs new file mode 100644 index 0000000000..1fb5cf2415 --- /dev/null +++ b/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs @@ -0,0 +1,113 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Linq; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests; + +public class UnionArrayTests +{ + [Theory] + [InlineData(UnionMode.Sparse)] + [InlineData(UnionMode.Dense)] + public void UnionArray_IsNull(UnionMode mode) + { + var fields = new Field[] + { + new Field("field0", new Int32Type(), true), + new Field("field1", new FloatType(), true), + }; + var typeIds = fields.Select(f => (int) f.DataType.TypeId).ToArray(); + var type = new UnionType(fields, typeIds, mode); + + const int length = 100; + var nullCount = 0; + var field0Builder = new Int32Array.Builder(); + var field1Builder = new FloatArray.Builder(); + var typeIdsBuilder = new ArrowBuffer.Builder<byte>(); + var valuesOffsetBuilder = new ArrowBuffer.Builder<int>(); + var expectedNull = new bool[length]; + + for (var i = 0; i < length; ++i) + { + var isNull = i % 5 == 0; + expectedNull[i] = isNull; + nullCount += isNull ? 1 : 0; + + if (i % 2 == 0) + { + typeIdsBuilder.Append(0); + if (mode == UnionMode.Sparse) + { + field1Builder.Append(0.0f); + } + else + { + valuesOffsetBuilder.Append(field0Builder.Length); + } + + if (isNull) + { + field0Builder.AppendNull(); + } + else + { + field0Builder.Append(i); + } + } + else + { + typeIdsBuilder.Append(1); + if (mode == UnionMode.Sparse) + { + field0Builder.Append(0); + } + else + { + valuesOffsetBuilder.Append(field1Builder.Length); + } + + if (isNull) + { + field1Builder.AppendNull(); + } + else + { + field1Builder.Append(i * 0.1f); + } + } + } + + var typeIdsBuffer = typeIdsBuilder.Build(); + var valuesOffsetBuffer = valuesOffsetBuilder.Build(); + var children = new IArrowArray[] + { + field0Builder.Build(), + field1Builder.Build() + }; + + UnionArray array = mode == UnionMode.Dense + ? new DenseUnionArray(type, length, children, typeIdsBuffer, valuesOffsetBuffer, nullCount) + : new SparseUnionArray(type, length, children, typeIdsBuffer, nullCount); + + for (var i = 0; i < length; ++i) + { + Assert.Equal(expectedNull[i], array.IsNull(i)); + Assert.Equal(!expectedNull[i], array.IsValid(i)); + } + } +}
