This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 096751f049 Add additional coverage for StringViewArray comparisons
(#9257)
096751f049 is described below
commit 096751f04960513089bc4124fd14d8a78d10b697
Author: Andrew Lamb <[email protected]>
AuthorDate: Sat Jan 24 07:55:35 2026 -0500
Add additional coverage for StringViewArray comparisons (#9257)
# Which issue does this PR close?
- Follow on to https://github.com/apache/arrow-rs/pull/9250
# Rationale for this change
While (posthumously) reviewing
https://github.com/apache/arrow-rs/pull/9250 from @Dandandan and
@zhuqi-lucas I noticed that some of the special case branches are not
covered.
# What changes are included in this PR?
Add some more tests to cover all the special cases
# Are these changes tested?
Yes, only tests
# Are there any user-facing changes?
<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
If there are any breaking changes to public APIs, please call them out.
-->
---
arrow-ord/src/cmp.rs | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 110 insertions(+)
diff --git a/arrow-ord/src/cmp.rs b/arrow-ord/src/cmp.rs
index 18e7e9e8fb..e4d8498276 100644
--- a/arrow-ord/src/cmp.rs
+++ b/arrow-ord/src/cmp.rs
@@ -706,6 +706,7 @@ mod tests {
use std::sync::Arc;
use arrow_array::{DictionaryArray, Int32Array, Scalar, StringArray};
+ use arrow_buffer::{Buffer, ScalarBuffer};
use super::*;
@@ -936,6 +937,115 @@ mod tests {
);
}
+ #[test]
+ fn test_string_view_eq_prefix_mismatch() {
+ // Prefix mismatch should short-circuit equality for long values.
+ let a =
+ arrow_array::StringViewArray::from(vec![Some("very long apple
exceeding 12 bytes")]);
+ let b =
+ arrow_array::StringViewArray::from(vec![Some("very long banana
exceeding 12 bytes")]);
+ assert_eq!(eq(&a, &b).unwrap(), BooleanArray::from(vec![Some(false)]));
+ }
+
+ #[test]
+ fn test_string_view_lt_prefix_mismatch() {
+ // Prefix mismatch should decide ordering without full compare for
long values.
+ let a =
+ arrow_array::StringViewArray::from(vec![Some("apple long string
exceeding 12 bytes")]);
+ let b =
+ arrow_array::StringViewArray::from(vec![Some("banana long string
exceeding 12 bytes")]);
+ assert_eq!(lt(&a, &b).unwrap(), BooleanArray::from(vec![true]));
+ }
+
+ #[test]
+ fn test_string_view_eq_inline_fast_path() {
+ // Inline-only arrays should compare by view equality fast path.
+ let a = arrow_array::StringViewArray::from(vec![Some("ab")]);
+ let b = arrow_array::StringViewArray::from(vec![Some("ab")]);
+ assert!(!has_buffers(&a));
+ assert!(!has_buffers(&b));
+ assert_eq!(eq(&a, &b).unwrap(), BooleanArray::from(vec![Some(true)]));
+ }
+
+ #[test]
+ fn test_string_view_eq_inline_prefix_mismatch_with_buffers() {
+ // Non-empty buffers force the prefix mismatch branch for inline
values.
+ let a = arrow_array::StringViewArray::from(vec![
+ Some("ab"),
+ Some("long string to allocate buffers"),
+ ]);
+ let b = arrow_array::StringViewArray::from(vec![
+ Some("ac"),
+ Some("long string to allocate buffers"),
+ ]);
+ assert!(has_buffers(&a));
+ assert!(has_buffers(&b));
+ assert_eq!(
+ eq(&a, &b).unwrap(),
+ BooleanArray::from(vec![Some(false), Some(true)])
+ );
+ }
+
+ #[test]
+ fn test_string_view_eq_empty_len_branch() {
+ // Reach the zero-length branch by bypassing the inline fast path with
a dummy buffer.
+ let raw_a = 0u128;
+ let raw_b = 1u128 << 96;
+ let views_a = ScalarBuffer::from(vec![raw_a]);
+ let views_b = ScalarBuffer::from(vec![raw_b]);
+ let buffers: Arc<[Buffer]> =
Arc::from([Buffer::from_slice_ref([0u8])]);
+ let a =
+ unsafe { arrow_array::StringViewArray::new_unchecked(views_a,
buffers.clone(), None) };
+ let b = unsafe { arrow_array::StringViewArray::new_unchecked(views_b,
buffers, None) };
+ assert!(has_buffers(&a));
+ assert!(has_buffers(&b));
+ assert!(<&arrow_array::StringViewArray as ArrayOrd>::is_eq(
+ (&a, 0),
+ (&b, 0)
+ ));
+ }
+
+ #[test]
+ fn test_string_view_long_prefix_mismatch_array_ord() {
+ // Long strings with differing prefixes should short-circuit on prefix
ordering.
+ let a =
+ arrow_array::StringViewArray::from(vec![Some("apple long string
exceeding 12 bytes")]);
+ let b =
+ arrow_array::StringViewArray::from(vec![Some("banana long string
exceeding 12 bytes")]);
+ assert!(has_buffers(&a));
+ assert!(has_buffers(&b));
+ assert!(<&arrow_array::StringViewArray as ArrayOrd>::is_lt(
+ (&a, 0),
+ (&b, 0)
+ ));
+ }
+
+ #[test]
+ fn test_string_view_inline_mismatch_array_ord() {
+ // Long strings with differing prefixes should short-circuit on prefix
ordering.
+ let a = arrow_array::StringViewArray::from(vec![Some("ap")]);
+ let b = arrow_array::StringViewArray::from(vec![Some("ba")]);
+ assert!(!has_buffers(&a));
+ assert!(!has_buffers(&b));
+ assert!(<&arrow_array::StringViewArray as ArrayOrd>::is_lt(
+ (&a, 0),
+ (&b, 0)
+ ));
+ }
+ #[test]
+ fn test_compare_byte_view_inline_fast_path() {
+ // Inline-only views should compare via inline key in
compare_byte_view.
+ let a = arrow_array::StringViewArray::from(vec![Some("ab")]);
+ let b = arrow_array::StringViewArray::from(vec![Some("ac")]);
+ assert!(!has_buffers(&a));
+ assert!(!has_buffers(&b));
+ assert_eq!(compare_byte_view(&a, 0, &b, 0), Ordering::Less);
+ }
+
+ fn has_buffers<T: ByteViewType>(array: &GenericByteViewArray<T>) -> bool {
+ !array.data_buffers().is_empty()
+ }
+
#[test]
fn test_compare_byte_view() {
let a = arrow_array::StringViewArray::from(vec![