This is an automated email from the ASF dual-hosted git repository.

curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new ded3295cb6 GH-37861: [C#] Fix StringArray.GetString returning null 
instead of empty (#37862)
ded3295cb6 is described below

commit ded3295cb6a29fe571e8cd9371746f59a05ba0e7
Author: Paul Spangler <[email protected]>
AuthorDate: Mon Oct 16 11:43:15 2023 -0500

    GH-37861: [C#] Fix StringArray.GetString returning null instead of empty 
(#37862)
    
    ### Rationale for this change
    
    Fixes #37861.
    
    ### What changes are included in this PR?
    
    Add `BinaryArray.GetBytes(int, out bool)` overload that enables
    `StringArray.GetString` to reliably determine if the value is null or
    empty without needing an additional call to `IsNull`.
    
    ### Are these changes tested?
    
    Added a test for `StringArray.GetString` (which didn't appear to have
    any existing tests). Two of the cases fail without this change. I also
    updated the tests that call `BinaryArray.GetBytes` to use the new
    overload instead of a separate call to `IsNull` as extra validation.
    
    ### Are there any user-facing changes?
    
    New public overload to an existing API.
    * Closes: #37861
---
 csharp/src/Apache.Arrow/Arrays/BinaryArray.cs      | 20 ++++++--
 csharp/src/Apache.Arrow/Arrays/StringArray.cs      |  6 +--
 .../Apache.Arrow.Tests/BinaryArrayBuilderTests.cs  |  3 +-
 csharp/test/Apache.Arrow.Tests/StringArrayTests.cs | 54 ++++++++++++++++++++++
 4 files changed, 75 insertions(+), 8 deletions(-)

diff --git a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs 
b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
index 20fe0342cc..a7ddb14af6 100644
--- a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
@@ -331,20 +331,33 @@ namespace Apache.Arrow
         /// <remarks>
         /// Note that this method cannot reliably identify null values, which 
are indistinguishable from empty byte
         /// collection values when seen in the context of this method's return 
type of <see cref="ReadOnlySpan{Byte}"/>.
-        /// Use the <see cref="Array.IsNull"/> method instead to reliably 
determine null values.
+        /// Use the <see cref="Array.IsNull"/> method or the <see 
cref="GetBytes(int, out bool)"/> overload instead
+        /// to reliably determine null values.
         /// </remarks>
         /// <param name="index">Index at which to get bytes.</param>
         /// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/> 
object.</returns>
         /// <exception cref="ArgumentOutOfRangeException">If the index is 
negative or beyond the length of the array.
         /// </exception>
-        public ReadOnlySpan<byte> GetBytes(int index)
+        public ReadOnlySpan<byte> GetBytes(int index) => GetBytes(index, out 
_);
+
+        /// <summary>
+        /// Get the collection of bytes, as a read-only span, at a given index 
in the array.
+        /// </summary>
+        /// <param name="index">Index at which to get bytes.</param>
+        /// <param name="isNull">Set to <see langword="true"/> if the value at 
the given index is null.</param>
+        /// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/> 
object.</returns>
+        /// <exception cref="ArgumentOutOfRangeException">If the index is 
negative or beyond the length of the array.
+        /// </exception>
+        public ReadOnlySpan<byte> GetBytes(int index, out bool isNull)
         {
             if (index < 0 || index >= Length)
             {
                 throw new ArgumentOutOfRangeException(nameof(index));
             }
 
-            if (IsNull(index))
+            isNull = IsNull(index);
+
+            if (isNull)
             {
                 // Note that `return null;` is valid syntax, but would be 
misleading as `null` in the context of a span
                 // is actually returned as an empty span.
@@ -353,6 +366,5 @@ namespace Apache.Arrow
 
             return ValueBuffer.Span.Slice(ValueOffsets[index], 
GetValueLength(index));
         }
-
     }
 }
diff --git a/csharp/src/Apache.Arrow/Arrays/StringArray.cs 
b/csharp/src/Apache.Arrow/Arrays/StringArray.cs
index f008f56fa8..42104b2717 100644
--- a/csharp/src/Apache.Arrow/Arrays/StringArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/StringArray.cs
@@ -72,11 +72,11 @@ namespace Apache.Arrow
 
         public string GetString(int index, Encoding encoding = default)
         {
-            encoding = encoding ?? DefaultEncoding;
+            encoding ??= DefaultEncoding;
 
-            ReadOnlySpan<byte> bytes = GetBytes(index);
+            ReadOnlySpan<byte> bytes = GetBytes(index, out bool isNull);
 
-            if (bytes == default)
+            if (isNull)
             {
                 return null;
             }
diff --git a/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs 
b/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs
index 7f45ce8578..4c2b050d0c 100644
--- a/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs
@@ -481,7 +481,8 @@ namespace Apache.Arrow.Tests
             for (int i = 0; i < array.Length; i++)
             {
                 var expectedArray = expectedContentsArr[i];
-                var actualArray = array.IsNull(i) ? null : 
array.GetBytes(i).ToArray();
+                var actualSpan = array.GetBytes(i, out bool isNull);
+                var actualArray = isNull ? null : actualSpan.ToArray();
                 Assert.Equal(expectedArray, actualArray);
             }
         }
diff --git a/csharp/test/Apache.Arrow.Tests/StringArrayTests.cs 
b/csharp/test/Apache.Arrow.Tests/StringArrayTests.cs
new file mode 100644
index 0000000000..0fd3d3d105
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Tests/StringArrayTests.cs
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using Xunit;
+
+namespace Apache.Arrow.Tests
+{
+    public class StringArrayTests
+    {
+        public class GetString
+        {
+            [Theory]
+            [InlineData(null, null)]
+            [InlineData(null, "")]
+            [InlineData(null, "value")]
+            [InlineData("", null)]
+            [InlineData("", "")]
+            [InlineData("", "value")]
+            [InlineData("value", null)]
+            [InlineData("value", "")]
+            [InlineData("value", "value")]
+            public void ReturnsAppendedValue(string firstValue, string 
secondValue)
+            {
+                // Arrange
+                // Create an array with two elements. The second element being 
null,
+                // empty, or non-empty may influence the underlying BinaryArray
+                // storage such that retrieving an empty first element could 
result
+                // in an empty span or a 0-length span backed by storage.
+                var array = new StringArray.Builder()
+                    .Append(firstValue)
+                    .Append(secondValue)
+                    .Build();
+
+                // Act
+                var retrievedValue = array.GetString(0);
+
+                // Assert
+                Assert.Equal(firstValue, retrievedValue);
+            }
+        }
+    }
+}

Reply via email to