This is an automated email from the ASF dual-hosted git repository.
wjones127 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 2461a16c19 Implement PartialEq for GenericBinaryArray (#6241)
2461a16c19 is described below
commit 2461a16c19ee5032531b1c05dd7e7192bc842e0f
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Aug 14 12:02:59 2024 -0400
Implement PartialEq for GenericBinaryArray (#6241)
---
arrow-array/src/array/byte_view_array.rs | 54 +++++++++++++++++++++++++++++---
1 file changed, 50 insertions(+), 4 deletions(-)
diff --git a/arrow-array/src/array/byte_view_array.rs
b/arrow-array/src/array/byte_view_array.rs
index a9aed95318..42f945838a 100644
--- a/arrow-array/src/array/byte_view_array.rs
+++ b/arrow-array/src/array/byte_view_array.rs
@@ -34,13 +34,25 @@ use super::ByteArrayType;
/// [Variable-size Binary View Layout]: An array of variable length bytes view
arrays.
///
-/// Different than [`crate::GenericByteArray`] as it stores both an offset and
length
-/// meaning that take / filter operations can be implemented without copying
the underlying data.
+/// [Variable-size Binary View Layout]:
https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-view-layout
+///
+/// This is different from [`GenericByteArray`] as it stores both an offset and
+/// length meaning that take / filter operations can be implemented without
+/// copying the underlying data. In addition, it stores an inlined prefix which
+/// can be used to speed up comparisons.
+///
+/// # See Also
///
/// See [`StringViewArray`] for storing utf8 encoded string data and
/// [`BinaryViewArray`] for storing bytes.
///
-/// [Variable-size Binary View Layout]:
https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-view-layout
+/// # Notes
+///
+/// Comparing two `GenericByteViewArray` using PartialEq compares by structure,
+/// not by value. as there are many different buffer layouts to represent the
+/// same data (e.g. different offsets, different buffer sizes, etc).
+///
+/// # Layout
///
/// A `GenericByteViewArray` stores variable length byte strings. An array of
/// `N` elements is stored as `N` fixed length "views" and a variable number
@@ -95,7 +107,6 @@ use super::ByteArrayType;
/// buffer 0
│...│
///
└───┘
/// ```
-/// [`GenericByteArray`]: crate::array::GenericByteArray
pub struct GenericByteViewArray<T: ByteViewType + ?Sized> {
data_type: DataType,
views: ScalarBuffer<u128>,
@@ -116,6 +127,16 @@ impl<T: ByteViewType + ?Sized> Clone for
GenericByteViewArray<T> {
}
}
+// PartialEq
+impl<T: ByteViewType + ?Sized> PartialEq for GenericByteViewArray<T> {
+ fn eq(&self, other: &Self) -> bool {
+ other.data_type.eq(&self.data_type)
+ && other.views.eq(&self.views)
+ && other.buffers.eq(&self.buffers)
+ && other.nulls.eq(&self.nulls)
+ }
+}
+
impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
/// Create a new [`GenericByteViewArray`] from the provided parts,
panicking on failure
///
@@ -870,4 +891,29 @@ mod tests {
check_gc(&array.slice(2, 2));
check_gc(&array.slice(3, 1));
}
+
+ #[test]
+ fn test_eq() {
+ let test_data = [
+ Some("longer than 12 bytes"),
+ None,
+ Some("short"),
+ Some("again, this is longer than 12 bytes"),
+ ];
+
+ let array1 = {
+ let mut builder =
StringViewBuilder::new().with_fixed_block_size(8);
+ test_data.into_iter().for_each(|v| builder.append_option(v));
+ builder.finish()
+ };
+ let array2 = {
+ // create a new array with the same data but different layout
+ let mut builder =
StringViewBuilder::new().with_fixed_block_size(100);
+ test_data.into_iter().for_each(|v| builder.append_option(v));
+ builder.finish()
+ };
+ assert_eq!(array1, array1.clone());
+ assert_eq!(array2, array2.clone());
+ assert_ne!(array1, array2);
+ }
}