This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new a2a80ca292 Fix string view ILIKE checks with NULL values (#6705)
a2a80ca292 is described below

commit a2a80ca292b4db8c6867f75d6d346a1372aea432
Author: Piotr Findeisen <[email protected]>
AuthorDate: Fri Nov 8 19:14:50 2024 +0100

    Fix string view ILIKE checks with NULL values (#6705)
---
 arrow-string/src/like.rs      | 226 ++++++++++++++++++++++--------------------
 arrow-string/src/predicate.rs |  14 +--
 2 files changed, 126 insertions(+), 114 deletions(-)

diff --git a/arrow-string/src/like.rs b/arrow-string/src/like.rs
index fee3b792a9..1b04b4eb56 100644
--- a/arrow-string/src/like.rs
+++ b/arrow-string/src/like.rs
@@ -1720,33 +1720,36 @@ mod tests {
             "%a%",        // can_execute as contains("a")
             "%a%b_c_d%e", // can_execute as regular expression
         ] {
-            let a = Scalar::new(StringArray::new_null(1));
-            let b = StringArray::new_scalar(pattern);
-            let r = like(&a, &b).unwrap();
-            assert_eq!(r.len(), 1, "With pattern {pattern}");
-            assert_eq!(r.null_count(), 1, "With pattern {pattern}");
-            assert!(r.is_null(0), "With pattern {pattern}");
-
-            let a = Scalar::new(StringArray::new_null(1));
-            let b = StringArray::from_iter_values([pattern]);
-            let r = like(&a, &b).unwrap();
-            assert_eq!(r.len(), 1, "With pattern {pattern}");
-            assert_eq!(r.null_count(), 1, "With pattern {pattern}");
-            assert!(r.is_null(0), "With pattern {pattern}");
-
-            let a = StringArray::new_null(1);
-            let b = StringArray::from_iter_values([pattern]);
-            let r = like(&a, &b).unwrap();
-            assert_eq!(r.len(), 1, "With pattern {pattern}");
-            assert_eq!(r.null_count(), 1, "With pattern {pattern}");
-            assert!(r.is_null(0), "With pattern {pattern}");
-
-            let a = StringArray::new_null(1);
-            let b = StringArray::new_scalar(pattern);
-            let r = like(&a, &b).unwrap();
-            assert_eq!(r.len(), 1, "With pattern {pattern}");
-            assert_eq!(r.null_count(), 1, "With pattern {pattern}");
-            assert!(r.is_null(0), "With pattern {pattern}");
+            // These tests focus on the null handling, but are case-insensitive
+            for like_f in [like, ilike, nlike, nilike] {
+                let a = Scalar::new(StringArray::new_null(1));
+                let b = StringArray::new_scalar(pattern);
+                let r = like_f(&a, &b).unwrap();
+                assert_eq!(r.len(), 1, "With pattern {pattern}");
+                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
+                assert!(r.is_null(0), "With pattern {pattern}");
+
+                let a = Scalar::new(StringArray::new_null(1));
+                let b = StringArray::from_iter_values([pattern]);
+                let r = like_f(&a, &b).unwrap();
+                assert_eq!(r.len(), 1, "With pattern {pattern}");
+                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
+                assert!(r.is_null(0), "With pattern {pattern}");
+
+                let a = StringArray::new_null(1);
+                let b = StringArray::from_iter_values([pattern]);
+                let r = like_f(&a, &b).unwrap();
+                assert_eq!(r.len(), 1, "With pattern {pattern}");
+                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
+                assert!(r.is_null(0), "With pattern {pattern}");
+
+                let a = StringArray::new_null(1);
+                let b = StringArray::new_scalar(pattern);
+                let r = like_f(&a, &b).unwrap();
+                assert_eq!(r.len(), 1, "With pattern {pattern}");
+                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
+                assert!(r.is_null(0), "With pattern {pattern}");
+            }
         }
     }
 
@@ -1763,95 +1766,102 @@ mod tests {
             "%a%",        // can_execute as contains("a")
             "%a%b_c_d%e", // can_execute as regular expression
         ] {
-            let a = Scalar::new(StringViewArray::new_null(1));
-            let b = StringViewArray::new_scalar(pattern);
-            let r = like(&a, &b).unwrap();
-            assert_eq!(r.len(), 1, "With pattern {pattern}");
-            assert_eq!(r.null_count(), 1, "With pattern {pattern}");
-            assert!(r.is_null(0), "With pattern {pattern}");
-
-            let a = Scalar::new(StringViewArray::new_null(1));
-            let b = StringViewArray::from_iter_values([pattern]);
-            let r = like(&a, &b).unwrap();
-            assert_eq!(r.len(), 1, "With pattern {pattern}");
-            assert_eq!(r.null_count(), 1, "With pattern {pattern}");
-            assert!(r.is_null(0), "With pattern {pattern}");
-
-            let a = StringViewArray::new_null(1);
-            let b = StringViewArray::from_iter_values([pattern]);
-            let r = like(&a, &b).unwrap();
-            assert_eq!(r.len(), 1, "With pattern {pattern}");
-            assert_eq!(r.null_count(), 1, "With pattern {pattern}");
-            assert!(r.is_null(0), "With pattern {pattern}");
-
-            let a = StringViewArray::new_null(1);
-            let b = StringViewArray::new_scalar(pattern);
-            let r = like(&a, &b).unwrap();
-            assert_eq!(r.len(), 1, "With pattern {pattern}");
-            assert_eq!(r.null_count(), 1, "With pattern {pattern}");
-            assert!(r.is_null(0), "With pattern {pattern}");
+            // These tests focus on the null handling, but are case-insensitive
+            for like_f in [like, ilike, nlike, nilike] {
+                let a = Scalar::new(StringViewArray::new_null(1));
+                let b = StringViewArray::new_scalar(pattern);
+                let r = like_f(&a, &b).unwrap();
+                assert_eq!(r.len(), 1, "With pattern {pattern}");
+                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
+                assert!(r.is_null(0), "With pattern {pattern}");
+
+                let a = Scalar::new(StringViewArray::new_null(1));
+                let b = StringViewArray::from_iter_values([pattern]);
+                let r = like_f(&a, &b).unwrap();
+                assert_eq!(r.len(), 1, "With pattern {pattern}");
+                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
+                assert!(r.is_null(0), "With pattern {pattern}");
+
+                let a = StringViewArray::new_null(1);
+                let b = StringViewArray::from_iter_values([pattern]);
+                let r = like_f(&a, &b).unwrap();
+                assert_eq!(r.len(), 1, "With pattern {pattern}");
+                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
+                assert!(r.is_null(0), "With pattern {pattern}");
+
+                let a = StringViewArray::new_null(1);
+                let b = StringViewArray::new_scalar(pattern);
+                let r = like_f(&a, &b).unwrap();
+                assert_eq!(r.len(), 1, "With pattern {pattern}");
+                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
+                assert!(r.is_null(0), "With pattern {pattern}");
+            }
         }
     }
 
     #[test]
     fn string_like_scalar_null() {
-        let a = StringArray::new_scalar("a");
-        let b = Scalar::new(StringArray::new_null(1));
-        let r = like(&a, &b).unwrap();
-        assert_eq!(r.len(), 1);
-        assert_eq!(r.null_count(), 1);
-        assert!(r.is_null(0));
-
-        let a = StringArray::from_iter_values(["a"]);
-        let b = Scalar::new(StringArray::new_null(1));
-        let r = like(&a, &b).unwrap();
-        assert_eq!(r.len(), 1);
-        assert_eq!(r.null_count(), 1);
-        assert!(r.is_null(0));
-
-        let a = StringArray::from_iter_values(["a"]);
-        let b = StringArray::new_null(1);
-        let r = like(&a, &b).unwrap();
-        assert_eq!(r.len(), 1);
-        assert_eq!(r.null_count(), 1);
-        assert!(r.is_null(0));
-
-        let a = StringArray::new_scalar("a");
-        let b = StringArray::new_null(1);
-        let r = like(&a, &b).unwrap();
-        assert_eq!(r.len(), 1);
-        assert_eq!(r.null_count(), 1);
-        assert!(r.is_null(0));
+        for like_f in [like, ilike, nlike, nilike] {
+            let a = StringArray::new_scalar("a");
+            let b = Scalar::new(StringArray::new_null(1));
+            let r = like_f(&a, &b).unwrap();
+            assert_eq!(r.len(), 1);
+            assert_eq!(r.null_count(), 1);
+            assert!(r.is_null(0));
+
+            let a = StringArray::from_iter_values(["a"]);
+            let b = Scalar::new(StringArray::new_null(1));
+            let r = like_f(&a, &b).unwrap();
+            assert_eq!(r.len(), 1);
+            assert_eq!(r.null_count(), 1);
+            assert!(r.is_null(0));
+
+            let a = StringArray::from_iter_values(["a"]);
+            let b = StringArray::new_null(1);
+            let r = like_f(&a, &b).unwrap();
+            assert_eq!(r.len(), 1);
+            assert_eq!(r.null_count(), 1);
+            assert!(r.is_null(0));
+
+            let a = StringArray::new_scalar("a");
+            let b = StringArray::new_null(1);
+            let r = like_f(&a, &b).unwrap();
+            assert_eq!(r.len(), 1);
+            assert_eq!(r.null_count(), 1);
+            assert!(r.is_null(0));
+        }
     }
 
     #[test]
     fn string_view_like_scalar_null() {
-        let a = StringViewArray::new_scalar("a");
-        let b = Scalar::new(StringViewArray::new_null(1));
-        let r = like(&a, &b).unwrap();
-        assert_eq!(r.len(), 1);
-        assert_eq!(r.null_count(), 1);
-        assert!(r.is_null(0));
-
-        let a = StringViewArray::from_iter_values(["a"]);
-        let b = Scalar::new(StringViewArray::new_null(1));
-        let r = like(&a, &b).unwrap();
-        assert_eq!(r.len(), 1);
-        assert_eq!(r.null_count(), 1);
-        assert!(r.is_null(0));
-
-        let a = StringViewArray::from_iter_values(["a"]);
-        let b = StringViewArray::new_null(1);
-        let r = like(&a, &b).unwrap();
-        assert_eq!(r.len(), 1);
-        assert_eq!(r.null_count(), 1);
-        assert!(r.is_null(0));
-
-        let a = StringViewArray::new_scalar("a");
-        let b = StringViewArray::new_null(1);
-        let r = like(&a, &b).unwrap();
-        assert_eq!(r.len(), 1);
-        assert_eq!(r.null_count(), 1);
-        assert!(r.is_null(0));
+        for like_f in [like, ilike, nlike, nilike] {
+            let a = StringViewArray::new_scalar("a");
+            let b = Scalar::new(StringViewArray::new_null(1));
+            let r = like_f(&a, &b).unwrap();
+            assert_eq!(r.len(), 1);
+            assert_eq!(r.null_count(), 1);
+            assert!(r.is_null(0));
+
+            let a = StringViewArray::from_iter_values(["a"]);
+            let b = Scalar::new(StringViewArray::new_null(1));
+            let r = like_f(&a, &b).unwrap();
+            assert_eq!(r.len(), 1);
+            assert_eq!(r.null_count(), 1);
+            assert!(r.is_null(0));
+
+            let a = StringViewArray::from_iter_values(["a"]);
+            let b = StringViewArray::new_null(1);
+            let r = like_f(&a, &b).unwrap();
+            assert_eq!(r.len(), 1);
+            assert_eq!(r.null_count(), 1);
+            assert!(r.is_null(0));
+
+            let a = StringViewArray::new_scalar("a");
+            let b = StringViewArray::new_null(1);
+            let r = like_f(&a, &b).unwrap();
+            assert_eq!(r.len(), 1);
+            assert_eq!(r.null_count(), 1);
+            assert!(r.is_null(0));
+        }
     }
 }
diff --git a/arrow-string/src/predicate.rs b/arrow-string/src/predicate.rs
index 1d141e6b0b..ae2493692d 100644
--- a/arrow-string/src/predicate.rs
+++ b/arrow-string/src/predicate.rs
@@ -138,8 +138,8 @@ impl<'a> Predicate<'a> {
             }
             Predicate::IStartsWithAscii(v) => {
                 if let Some(string_view_array) = 
array.as_any().downcast_ref::<StringViewArray>() {
-                    // TODO respect null buffer
-                    BooleanArray::from(
+                    let nulls = string_view_array.logical_nulls();
+                    let values = BooleanBuffer::from(
                         string_view_array
                             .prefix_bytes_iter(v.len())
                             .map(|haystack| {
@@ -150,7 +150,8 @@ impl<'a> Predicate<'a> {
                                 ) != negate
                             })
                             .collect::<Vec<_>>(),
-                    )
+                    );
+                    BooleanArray::new(values, nulls)
                 } else {
                     BooleanArray::from_unary(array, |haystack| {
                         starts_with(haystack, v, 
equals_ignore_ascii_case_kernel) != negate
@@ -177,8 +178,8 @@ impl<'a> Predicate<'a> {
             }
             Predicate::IEndsWithAscii(v) => {
                 if let Some(string_view_array) = 
array.as_any().downcast_ref::<StringViewArray>() {
-                    // TODO respect null buffer
-                    BooleanArray::from(
+                    let nulls = string_view_array.logical_nulls();
+                    let values = BooleanBuffer::from(
                         string_view_array
                             .suffix_bytes_iter(v.len())
                             .map(|haystack| {
@@ -189,7 +190,8 @@ impl<'a> Predicate<'a> {
                                 ) != negate
                             })
                             .collect::<Vec<_>>(),
-                    )
+                    );
+                    BooleanArray::new(values, nulls)
                 } else {
                     BooleanArray::from_unary(array, |haystack| {
                         ends_with(haystack, v, 
equals_ignore_ascii_case_kernel) != negate

Reply via email to