This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new a2a80ca292 Fix string view ILIKE checks with NULL values (#6705)
a2a80ca292 is described below
commit a2a80ca292b4db8c6867f75d6d346a1372aea432
Author: Piotr Findeisen <[email protected]>
AuthorDate: Fri Nov 8 19:14:50 2024 +0100
Fix string view ILIKE checks with NULL values (#6705)
---
arrow-string/src/like.rs | 226 ++++++++++++++++++++++--------------------
arrow-string/src/predicate.rs | 14 +--
2 files changed, 126 insertions(+), 114 deletions(-)
diff --git a/arrow-string/src/like.rs b/arrow-string/src/like.rs
index fee3b792a9..1b04b4eb56 100644
--- a/arrow-string/src/like.rs
+++ b/arrow-string/src/like.rs
@@ -1720,33 +1720,36 @@ mod tests {
"%a%", // can_execute as contains("a")
"%a%b_c_d%e", // can_execute as regular expression
] {
- let a = Scalar::new(StringArray::new_null(1));
- let b = StringArray::new_scalar(pattern);
- let r = like(&a, &b).unwrap();
- assert_eq!(r.len(), 1, "With pattern {pattern}");
- assert_eq!(r.null_count(), 1, "With pattern {pattern}");
- assert!(r.is_null(0), "With pattern {pattern}");
-
- let a = Scalar::new(StringArray::new_null(1));
- let b = StringArray::from_iter_values([pattern]);
- let r = like(&a, &b).unwrap();
- assert_eq!(r.len(), 1, "With pattern {pattern}");
- assert_eq!(r.null_count(), 1, "With pattern {pattern}");
- assert!(r.is_null(0), "With pattern {pattern}");
-
- let a = StringArray::new_null(1);
- let b = StringArray::from_iter_values([pattern]);
- let r = like(&a, &b).unwrap();
- assert_eq!(r.len(), 1, "With pattern {pattern}");
- assert_eq!(r.null_count(), 1, "With pattern {pattern}");
- assert!(r.is_null(0), "With pattern {pattern}");
-
- let a = StringArray::new_null(1);
- let b = StringArray::new_scalar(pattern);
- let r = like(&a, &b).unwrap();
- assert_eq!(r.len(), 1, "With pattern {pattern}");
- assert_eq!(r.null_count(), 1, "With pattern {pattern}");
- assert!(r.is_null(0), "With pattern {pattern}");
+ // These tests focus on the null handling, but are case-insensitive
+ for like_f in [like, ilike, nlike, nilike] {
+ let a = Scalar::new(StringArray::new_null(1));
+ let b = StringArray::new_scalar(pattern);
+ let r = like_f(&a, &b).unwrap();
+ assert_eq!(r.len(), 1, "With pattern {pattern}");
+ assert_eq!(r.null_count(), 1, "With pattern {pattern}");
+ assert!(r.is_null(0), "With pattern {pattern}");
+
+ let a = Scalar::new(StringArray::new_null(1));
+ let b = StringArray::from_iter_values([pattern]);
+ let r = like_f(&a, &b).unwrap();
+ assert_eq!(r.len(), 1, "With pattern {pattern}");
+ assert_eq!(r.null_count(), 1, "With pattern {pattern}");
+ assert!(r.is_null(0), "With pattern {pattern}");
+
+ let a = StringArray::new_null(1);
+ let b = StringArray::from_iter_values([pattern]);
+ let r = like_f(&a, &b).unwrap();
+ assert_eq!(r.len(), 1, "With pattern {pattern}");
+ assert_eq!(r.null_count(), 1, "With pattern {pattern}");
+ assert!(r.is_null(0), "With pattern {pattern}");
+
+ let a = StringArray::new_null(1);
+ let b = StringArray::new_scalar(pattern);
+ let r = like_f(&a, &b).unwrap();
+ assert_eq!(r.len(), 1, "With pattern {pattern}");
+ assert_eq!(r.null_count(), 1, "With pattern {pattern}");
+ assert!(r.is_null(0), "With pattern {pattern}");
+ }
}
}
@@ -1763,95 +1766,102 @@ mod tests {
"%a%", // can_execute as contains("a")
"%a%b_c_d%e", // can_execute as regular expression
] {
- let a = Scalar::new(StringViewArray::new_null(1));
- let b = StringViewArray::new_scalar(pattern);
- let r = like(&a, &b).unwrap();
- assert_eq!(r.len(), 1, "With pattern {pattern}");
- assert_eq!(r.null_count(), 1, "With pattern {pattern}");
- assert!(r.is_null(0), "With pattern {pattern}");
-
- let a = Scalar::new(StringViewArray::new_null(1));
- let b = StringViewArray::from_iter_values([pattern]);
- let r = like(&a, &b).unwrap();
- assert_eq!(r.len(), 1, "With pattern {pattern}");
- assert_eq!(r.null_count(), 1, "With pattern {pattern}");
- assert!(r.is_null(0), "With pattern {pattern}");
-
- let a = StringViewArray::new_null(1);
- let b = StringViewArray::from_iter_values([pattern]);
- let r = like(&a, &b).unwrap();
- assert_eq!(r.len(), 1, "With pattern {pattern}");
- assert_eq!(r.null_count(), 1, "With pattern {pattern}");
- assert!(r.is_null(0), "With pattern {pattern}");
-
- let a = StringViewArray::new_null(1);
- let b = StringViewArray::new_scalar(pattern);
- let r = like(&a, &b).unwrap();
- assert_eq!(r.len(), 1, "With pattern {pattern}");
- assert_eq!(r.null_count(), 1, "With pattern {pattern}");
- assert!(r.is_null(0), "With pattern {pattern}");
+ // These tests focus on the null handling, but are case-insensitive
+ for like_f in [like, ilike, nlike, nilike] {
+ let a = Scalar::new(StringViewArray::new_null(1));
+ let b = StringViewArray::new_scalar(pattern);
+ let r = like_f(&a, &b).unwrap();
+ assert_eq!(r.len(), 1, "With pattern {pattern}");
+ assert_eq!(r.null_count(), 1, "With pattern {pattern}");
+ assert!(r.is_null(0), "With pattern {pattern}");
+
+ let a = Scalar::new(StringViewArray::new_null(1));
+ let b = StringViewArray::from_iter_values([pattern]);
+ let r = like_f(&a, &b).unwrap();
+ assert_eq!(r.len(), 1, "With pattern {pattern}");
+ assert_eq!(r.null_count(), 1, "With pattern {pattern}");
+ assert!(r.is_null(0), "With pattern {pattern}");
+
+ let a = StringViewArray::new_null(1);
+ let b = StringViewArray::from_iter_values([pattern]);
+ let r = like_f(&a, &b).unwrap();
+ assert_eq!(r.len(), 1, "With pattern {pattern}");
+ assert_eq!(r.null_count(), 1, "With pattern {pattern}");
+ assert!(r.is_null(0), "With pattern {pattern}");
+
+ let a = StringViewArray::new_null(1);
+ let b = StringViewArray::new_scalar(pattern);
+ let r = like_f(&a, &b).unwrap();
+ assert_eq!(r.len(), 1, "With pattern {pattern}");
+ assert_eq!(r.null_count(), 1, "With pattern {pattern}");
+ assert!(r.is_null(0), "With pattern {pattern}");
+ }
}
}
#[test]
fn string_like_scalar_null() {
- let a = StringArray::new_scalar("a");
- let b = Scalar::new(StringArray::new_null(1));
- let r = like(&a, &b).unwrap();
- assert_eq!(r.len(), 1);
- assert_eq!(r.null_count(), 1);
- assert!(r.is_null(0));
-
- let a = StringArray::from_iter_values(["a"]);
- let b = Scalar::new(StringArray::new_null(1));
- let r = like(&a, &b).unwrap();
- assert_eq!(r.len(), 1);
- assert_eq!(r.null_count(), 1);
- assert!(r.is_null(0));
-
- let a = StringArray::from_iter_values(["a"]);
- let b = StringArray::new_null(1);
- let r = like(&a, &b).unwrap();
- assert_eq!(r.len(), 1);
- assert_eq!(r.null_count(), 1);
- assert!(r.is_null(0));
-
- let a = StringArray::new_scalar("a");
- let b = StringArray::new_null(1);
- let r = like(&a, &b).unwrap();
- assert_eq!(r.len(), 1);
- assert_eq!(r.null_count(), 1);
- assert!(r.is_null(0));
+ for like_f in [like, ilike, nlike, nilike] {
+ let a = StringArray::new_scalar("a");
+ let b = Scalar::new(StringArray::new_null(1));
+ let r = like_f(&a, &b).unwrap();
+ assert_eq!(r.len(), 1);
+ assert_eq!(r.null_count(), 1);
+ assert!(r.is_null(0));
+
+ let a = StringArray::from_iter_values(["a"]);
+ let b = Scalar::new(StringArray::new_null(1));
+ let r = like_f(&a, &b).unwrap();
+ assert_eq!(r.len(), 1);
+ assert_eq!(r.null_count(), 1);
+ assert!(r.is_null(0));
+
+ let a = StringArray::from_iter_values(["a"]);
+ let b = StringArray::new_null(1);
+ let r = like_f(&a, &b).unwrap();
+ assert_eq!(r.len(), 1);
+ assert_eq!(r.null_count(), 1);
+ assert!(r.is_null(0));
+
+ let a = StringArray::new_scalar("a");
+ let b = StringArray::new_null(1);
+ let r = like_f(&a, &b).unwrap();
+ assert_eq!(r.len(), 1);
+ assert_eq!(r.null_count(), 1);
+ assert!(r.is_null(0));
+ }
}
#[test]
fn string_view_like_scalar_null() {
- let a = StringViewArray::new_scalar("a");
- let b = Scalar::new(StringViewArray::new_null(1));
- let r = like(&a, &b).unwrap();
- assert_eq!(r.len(), 1);
- assert_eq!(r.null_count(), 1);
- assert!(r.is_null(0));
-
- let a = StringViewArray::from_iter_values(["a"]);
- let b = Scalar::new(StringViewArray::new_null(1));
- let r = like(&a, &b).unwrap();
- assert_eq!(r.len(), 1);
- assert_eq!(r.null_count(), 1);
- assert!(r.is_null(0));
-
- let a = StringViewArray::from_iter_values(["a"]);
- let b = StringViewArray::new_null(1);
- let r = like(&a, &b).unwrap();
- assert_eq!(r.len(), 1);
- assert_eq!(r.null_count(), 1);
- assert!(r.is_null(0));
-
- let a = StringViewArray::new_scalar("a");
- let b = StringViewArray::new_null(1);
- let r = like(&a, &b).unwrap();
- assert_eq!(r.len(), 1);
- assert_eq!(r.null_count(), 1);
- assert!(r.is_null(0));
+ for like_f in [like, ilike, nlike, nilike] {
+ let a = StringViewArray::new_scalar("a");
+ let b = Scalar::new(StringViewArray::new_null(1));
+ let r = like_f(&a, &b).unwrap();
+ assert_eq!(r.len(), 1);
+ assert_eq!(r.null_count(), 1);
+ assert!(r.is_null(0));
+
+ let a = StringViewArray::from_iter_values(["a"]);
+ let b = Scalar::new(StringViewArray::new_null(1));
+ let r = like_f(&a, &b).unwrap();
+ assert_eq!(r.len(), 1);
+ assert_eq!(r.null_count(), 1);
+ assert!(r.is_null(0));
+
+ let a = StringViewArray::from_iter_values(["a"]);
+ let b = StringViewArray::new_null(1);
+ let r = like_f(&a, &b).unwrap();
+ assert_eq!(r.len(), 1);
+ assert_eq!(r.null_count(), 1);
+ assert!(r.is_null(0));
+
+ let a = StringViewArray::new_scalar("a");
+ let b = StringViewArray::new_null(1);
+ let r = like_f(&a, &b).unwrap();
+ assert_eq!(r.len(), 1);
+ assert_eq!(r.null_count(), 1);
+ assert!(r.is_null(0));
+ }
}
}
diff --git a/arrow-string/src/predicate.rs b/arrow-string/src/predicate.rs
index 1d141e6b0b..ae2493692d 100644
--- a/arrow-string/src/predicate.rs
+++ b/arrow-string/src/predicate.rs
@@ -138,8 +138,8 @@ impl<'a> Predicate<'a> {
}
Predicate::IStartsWithAscii(v) => {
if let Some(string_view_array) =
array.as_any().downcast_ref::<StringViewArray>() {
- // TODO respect null buffer
- BooleanArray::from(
+ let nulls = string_view_array.logical_nulls();
+ let values = BooleanBuffer::from(
string_view_array
.prefix_bytes_iter(v.len())
.map(|haystack| {
@@ -150,7 +150,8 @@ impl<'a> Predicate<'a> {
) != negate
})
.collect::<Vec<_>>(),
- )
+ );
+ BooleanArray::new(values, nulls)
} else {
BooleanArray::from_unary(array, |haystack| {
starts_with(haystack, v,
equals_ignore_ascii_case_kernel) != negate
@@ -177,8 +178,8 @@ impl<'a> Predicate<'a> {
}
Predicate::IEndsWithAscii(v) => {
if let Some(string_view_array) =
array.as_any().downcast_ref::<StringViewArray>() {
- // TODO respect null buffer
- BooleanArray::from(
+ let nulls = string_view_array.logical_nulls();
+ let values = BooleanBuffer::from(
string_view_array
.suffix_bytes_iter(v.len())
.map(|haystack| {
@@ -189,7 +190,8 @@ impl<'a> Predicate<'a> {
) != negate
})
.collect::<Vec<_>>(),
- )
+ );
+ BooleanArray::new(values, nulls)
} else {
BooleanArray::from_unary(array, |haystack| {
ends_with(haystack, v,
equals_ignore_ascii_case_kernel) != negate