This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 21693a4 fix a bug in variable sized equality (#1209)
21693a4 is described below
commit 21693a4362de03efa09302ab9d93d84d8d37a824
Author: Helgi Kristvin Sigurbjarnarson <[email protected]>
AuthorDate: Wed Jan 19 13:28:31 2022 -0800
fix a bug in variable sized equality (#1209)
A missing validity buffer was being treated as all values being null,
rather than all values being valid, causing equality to fail on some
equivalent string and binary arrays.
---
arrow/src/array/equal/mod.rs | 42 ++++++++++++++++++++++++++++++----
arrow/src/array/equal/variable_size.rs | 6 ++---
2 files changed, 41 insertions(+), 7 deletions(-)
diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs
index 09ba0cd..7edf9cb 100644
--- a/arrow/src/array/equal/mod.rs
+++ b/arrow/src/array/equal/mod.rs
@@ -304,10 +304,10 @@ mod tests {
use std::sync::Arc;
use crate::array::{
- array::Array, ArrayDataBuilder, ArrayRef, BinaryOffsetSizeTrait,
BooleanArray,
- DecimalBuilder, FixedSizeBinaryBuilder, FixedSizeListBuilder,
GenericBinaryArray,
- Int32Builder, ListBuilder, NullArray, PrimitiveBuilder, StringArray,
- StringDictionaryBuilder, StringOffsetSizeTrait, StructArray,
+ array::Array, ArrayData, ArrayDataBuilder, ArrayRef,
BinaryOffsetSizeTrait,
+ BooleanArray, DecimalBuilder, FixedSizeBinaryBuilder,
FixedSizeListBuilder,
+ GenericBinaryArray, Int32Builder, ListBuilder, NullArray,
PrimitiveBuilder,
+ StringArray, StringDictionaryBuilder, StringOffsetSizeTrait,
StructArray,
};
use crate::array::{GenericStringArray, Int32Array};
use crate::buffer::Buffer;
@@ -1297,4 +1297,38 @@ mod tests {
);
test_equal(&a, &b, false);
}
+
+ #[test]
+ fn test_non_null_empty_strings() {
+ let s = StringArray::from(vec![Some(""), Some(""), Some("")]);
+
+ let string1 = s.data();
+
+ let string2 = ArrayData::builder(DataType::Utf8)
+ .len(string1.len())
+ .buffers(string1.buffers().to_vec())
+ .build()
+ .unwrap();
+
+ // string2 is identical to string1 except that it has no validity
buffer but since there
+ // are no nulls, string1 and string2 are equal
+ test_equal(string1, &string2, true);
+ }
+
+ #[test]
+ fn test_null_empty_strings() {
+ let s = StringArray::from(vec![Some(""), None, Some("")]);
+
+ let string1 = s.data();
+
+ let string2 = ArrayData::builder(DataType::Utf8)
+ .len(string1.len())
+ .buffers(string1.buffers().to_vec())
+ .build()
+ .unwrap();
+
+ // string2 is identical to string1 except that it has no validity
buffer since string1 has
+ // nulls in it, string1 and string2 are not equal
+ test_equal(string1, &string2, false);
+ }
}
diff --git a/arrow/src/array/equal/variable_size.rs
b/arrow/src/array/equal/variable_size.rs
index ecb3bc2..946f107 100644
--- a/arrow/src/array/equal/variable_size.rs
+++ b/arrow/src/array/equal/variable_size.rs
@@ -86,13 +86,13 @@ pub(super) fn variable_sized_equal<T: OffsetSizeTrait>(
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;
- // the null bits can still be `None`, so we don't unwrap
+ // the null bits can still be `None`, indicating that the value is
valid.
let lhs_is_null = !lhs_nulls
.map(|v| get_bit(v.as_slice(), lhs.offset() + lhs_pos))
- .unwrap_or(false);
+ .unwrap_or(true);
let rhs_is_null = !rhs_nulls
.map(|v| get_bit(v.as_slice(), rhs.offset() + rhs_pos))
- .unwrap_or(false);
+ .unwrap_or(true);
lhs_is_null
|| (lhs_is_null == rhs_is_null)