This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 21693a4  fix a bug in variable sized equality (#1209)
21693a4 is described below

commit 21693a4362de03efa09302ab9d93d84d8d37a824
Author: Helgi Kristvin Sigurbjarnarson <[email protected]>
AuthorDate: Wed Jan 19 13:28:31 2022 -0800

    fix a bug in variable sized equality (#1209)
    
    A missing validity buffer was being treated as all values being null,
    rather than all values being valid, causing equality to fail on some
    equivalent string and binary arrays.
---
 arrow/src/array/equal/mod.rs           | 42 ++++++++++++++++++++++++++++++----
 arrow/src/array/equal/variable_size.rs |  6 ++---
 2 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs
index 09ba0cd..7edf9cb 100644
--- a/arrow/src/array/equal/mod.rs
+++ b/arrow/src/array/equal/mod.rs
@@ -304,10 +304,10 @@ mod tests {
     use std::sync::Arc;
 
     use crate::array::{
-        array::Array, ArrayDataBuilder, ArrayRef, BinaryOffsetSizeTrait, 
BooleanArray,
-        DecimalBuilder, FixedSizeBinaryBuilder, FixedSizeListBuilder, 
GenericBinaryArray,
-        Int32Builder, ListBuilder, NullArray, PrimitiveBuilder, StringArray,
-        StringDictionaryBuilder, StringOffsetSizeTrait, StructArray,
+        array::Array, ArrayData, ArrayDataBuilder, ArrayRef, 
BinaryOffsetSizeTrait,
+        BooleanArray, DecimalBuilder, FixedSizeBinaryBuilder, 
FixedSizeListBuilder,
+        GenericBinaryArray, Int32Builder, ListBuilder, NullArray, 
PrimitiveBuilder,
+        StringArray, StringDictionaryBuilder, StringOffsetSizeTrait, 
StructArray,
     };
     use crate::array::{GenericStringArray, Int32Array};
     use crate::buffer::Buffer;
@@ -1297,4 +1297,38 @@ mod tests {
         );
         test_equal(&a, &b, false);
     }
+
+    #[test]
+    fn test_non_null_empty_strings() {
+        let s = StringArray::from(vec![Some(""), Some(""), Some("")]);
+
+        let string1 = s.data();
+
+        let string2 = ArrayData::builder(DataType::Utf8)
+            .len(string1.len())
+            .buffers(string1.buffers().to_vec())
+            .build()
+            .unwrap();
+
+        // string2 is identical to string1 except that it has no validity 
buffer but since there
+        // are no nulls, string1 and string2 are equal
+        test_equal(string1, &string2, true);
+    }
+
+    #[test]
+    fn test_null_empty_strings() {
+        let s = StringArray::from(vec![Some(""), None, Some("")]);
+
+        let string1 = s.data();
+
+        let string2 = ArrayData::builder(DataType::Utf8)
+            .len(string1.len())
+            .buffers(string1.buffers().to_vec())
+            .build()
+            .unwrap();
+
+        // string2 is identical to string1 except that it has no validity 
buffer since string1 has
+        // nulls in it, string1 and string2 are not equal
+        test_equal(string1, &string2, false);
+    }
 }
diff --git a/arrow/src/array/equal/variable_size.rs 
b/arrow/src/array/equal/variable_size.rs
index ecb3bc2..946f107 100644
--- a/arrow/src/array/equal/variable_size.rs
+++ b/arrow/src/array/equal/variable_size.rs
@@ -86,13 +86,13 @@ pub(super) fn variable_sized_equal<T: OffsetSizeTrait>(
             let lhs_pos = lhs_start + i;
             let rhs_pos = rhs_start + i;
 
-            // the null bits can still be `None`, so we don't unwrap
+            // the null bits can still be `None`, indicating that the value is 
valid.
             let lhs_is_null = !lhs_nulls
                 .map(|v| get_bit(v.as_slice(), lhs.offset() + lhs_pos))
-                .unwrap_or(false);
+                .unwrap_or(true);
             let rhs_is_null = !rhs_nulls
                 .map(|v| get_bit(v.as_slice(), rhs.offset() + rhs_pos))
-                .unwrap_or(false);
+                .unwrap_or(true);
 
             lhs_is_null
                 || (lhs_is_null == rhs_is_null)

Reply via email to