This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new ded3295cb6 GH-37861: [C#] Fix StringArray.GetString returning null
instead of empty (#37862)
ded3295cb6 is described below
commit ded3295cb6a29fe571e8cd9371746f59a05ba0e7
Author: Paul Spangler <[email protected]>
AuthorDate: Mon Oct 16 11:43:15 2023 -0500
GH-37861: [C#] Fix StringArray.GetString returning null instead of empty
(#37862)
### Rationale for this change
Fixes #37861.
### What changes are included in this PR?
Add `BinaryArray.GetBytes(int, out bool)` overload that enables
`StringArray.GetString` to reliably determine if the value is null or
empty without needing an additional call to `IsNull`.
### Are these changes tested?
Added a test for `StringArray.GetString` (which didn't appear to have
any existing tests). Two of the cases fail without this change. I also
updated the tests that call `BinaryArray.GetBytes` to use the new
overload instead of a separate call to `IsNull` as extra validation.
### Are there any user-facing changes?
New public overload to an existing API.
* Closes: #37861
---
csharp/src/Apache.Arrow/Arrays/BinaryArray.cs | 20 ++++++--
csharp/src/Apache.Arrow/Arrays/StringArray.cs | 6 +--
.../Apache.Arrow.Tests/BinaryArrayBuilderTests.cs | 3 +-
csharp/test/Apache.Arrow.Tests/StringArrayTests.cs | 54 ++++++++++++++++++++++
4 files changed, 75 insertions(+), 8 deletions(-)
diff --git a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
index 20fe0342cc..a7ddb14af6 100644
--- a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
@@ -331,20 +331,33 @@ namespace Apache.Arrow
/// <remarks>
/// Note that this method cannot reliably identify null values, which
are indistinguishable from empty byte
/// collection values when seen in the context of this method's return
type of <see cref="ReadOnlySpan{Byte}"/>.
- /// Use the <see cref="Array.IsNull"/> method instead to reliably
determine null values.
+ /// Use the <see cref="Array.IsNull"/> method or the <see
cref="GetBytes(int, out bool)"/> overload instead
+ /// to reliably determine null values.
/// </remarks>
/// <param name="index">Index at which to get bytes.</param>
/// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/>
object.</returns>
/// <exception cref="ArgumentOutOfRangeException">If the index is
negative or beyond the length of the array.
/// </exception>
- public ReadOnlySpan<byte> GetBytes(int index)
+ public ReadOnlySpan<byte> GetBytes(int index) => GetBytes(index, out
_);
+
+ /// <summary>
+ /// Get the collection of bytes, as a read-only span, at a given index
in the array.
+ /// </summary>
+ /// <param name="index">Index at which to get bytes.</param>
+ /// <param name="isNull">Set to <see langword="true"/> if the value at
the given index is null.</param>
+ /// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/>
object.</returns>
+ /// <exception cref="ArgumentOutOfRangeException">If the index is
negative or beyond the length of the array.
+ /// </exception>
+ public ReadOnlySpan<byte> GetBytes(int index, out bool isNull)
{
if (index < 0 || index >= Length)
{
throw new ArgumentOutOfRangeException(nameof(index));
}
- if (IsNull(index))
+ isNull = IsNull(index);
+
+ if (isNull)
{
// Note that `return null;` is valid syntax, but would be
misleading as `null` in the context of a span
// is actually returned as an empty span.
@@ -353,6 +366,5 @@ namespace Apache.Arrow
return ValueBuffer.Span.Slice(ValueOffsets[index],
GetValueLength(index));
}
-
}
}
diff --git a/csharp/src/Apache.Arrow/Arrays/StringArray.cs
b/csharp/src/Apache.Arrow/Arrays/StringArray.cs
index f008f56fa8..42104b2717 100644
--- a/csharp/src/Apache.Arrow/Arrays/StringArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/StringArray.cs
@@ -72,11 +72,11 @@ namespace Apache.Arrow
public string GetString(int index, Encoding encoding = default)
{
- encoding = encoding ?? DefaultEncoding;
+ encoding ??= DefaultEncoding;
- ReadOnlySpan<byte> bytes = GetBytes(index);
+ ReadOnlySpan<byte> bytes = GetBytes(index, out bool isNull);
- if (bytes == default)
+ if (isNull)
{
return null;
}
diff --git a/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs
b/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs
index 7f45ce8578..4c2b050d0c 100644
--- a/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs
@@ -481,7 +481,8 @@ namespace Apache.Arrow.Tests
for (int i = 0; i < array.Length; i++)
{
var expectedArray = expectedContentsArr[i];
- var actualArray = array.IsNull(i) ? null :
array.GetBytes(i).ToArray();
+ var actualSpan = array.GetBytes(i, out bool isNull);
+ var actualArray = isNull ? null : actualSpan.ToArray();
Assert.Equal(expectedArray, actualArray);
}
}
diff --git a/csharp/test/Apache.Arrow.Tests/StringArrayTests.cs
b/csharp/test/Apache.Arrow.Tests/StringArrayTests.cs
new file mode 100644
index 0000000000..0fd3d3d105
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Tests/StringArrayTests.cs
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using Xunit;
+
+namespace Apache.Arrow.Tests
+{
+ public class StringArrayTests
+ {
+ public class GetString
+ {
+ [Theory]
+ [InlineData(null, null)]
+ [InlineData(null, "")]
+ [InlineData(null, "value")]
+ [InlineData("", null)]
+ [InlineData("", "")]
+ [InlineData("", "value")]
+ [InlineData("value", null)]
+ [InlineData("value", "")]
+ [InlineData("value", "value")]
+ public void ReturnsAppendedValue(string firstValue, string
secondValue)
+ {
+ // Arrange
+ // Create an array with two elements. The second element being
null,
+ // empty, or non-empty may influence the underlying BinaryArray
+ // storage such that retrieving an empty first element could
result
+ // in an empty span or a 0-length span backed by storage.
+ var array = new StringArray.Builder()
+ .Append(firstValue)
+ .Append(secondValue)
+ .Build();
+
+ // Act
+ var retrievedValue = array.GetString(0);
+
+ // Assert
+ Assert.Equal(firstValue, retrievedValue);
+ }
+ }
+ }
+}