This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new d11da24ad refactor: Merge similar functions `ilike_scalar` and 
`nilike_scalar` (#3303)
d11da24ad is described below

commit d11da24ad43c27e28d0b2340a810e4733c2162cf
Author: askoa <[email protected]>
AuthorDate: Fri Dec 9 06:47:09 2022 -0500

    refactor: Merge similar functions `ilike_scalar` and `nilike_scalar` (#3303)
    
    * merge functions ilike_scalar and nilike_scalar
    
    * Use from_unary
    
    Co-authored-by: askoa <askoa@local>
    Co-authored-by: Raphael Taylor-Davies <[email protected]>
---
 arrow-string/src/like.rs | 169 ++++++++---------------------------------------
 1 file changed, 26 insertions(+), 143 deletions(-)

diff --git a/arrow-string/src/like.rs b/arrow-string/src/like.rs
index 11d79676d..c8a4d37cd 100644
--- a/arrow-string/src/like.rs
+++ b/arrow-string/src/like.rs
@@ -18,7 +18,6 @@
 use arrow_array::builder::BooleanBufferBuilder;
 use arrow_array::cast::*;
 use arrow_array::*;
-use arrow_buffer::{bit_util, MutableBuffer};
 use arrow_data::bit_mask::combine_option_bitmap;
 use arrow_data::ArrayData;
 use arrow_schema::*;
@@ -584,66 +583,44 @@ fn ilike_dict<K: ArrowPrimitiveType>(
 }
 
 #[inline]
-fn ilike_scalar<'a, L: ArrayAccessor<Item = &'a str>>(
+fn ilike_scalar_op<'a, F: Fn(bool) -> bool, L: ArrayAccessor<Item = &'a str>>(
     left: L,
     right: &str,
+    op: F,
 ) -> Result<BooleanArray, ArrowError> {
-    let null_bit_buffer = left.data().null_buffer().cloned();
-    let bytes = bit_util::ceil(left.len(), 8);
-    let mut bool_buf = MutableBuffer::from_len_zeroed(bytes);
-    let bool_slice = bool_buf.as_slice_mut();
-
     if !right.contains(is_like_pattern) {
         // fast path, can use equals
         let right_uppercase = right.to_uppercase();
-        for i in 0..left.len() {
-            unsafe {
-                if left.value_unchecked(i).to_uppercase() == right_uppercase {
-                    bit_util::set_bit(bool_slice, i);
-                }
-            }
-        }
+
+        Ok(BooleanArray::from_unary(left, |item| {
+            op(item.to_uppercase() == right_uppercase)
+        }))
     } else if right.ends_with('%')
         && !right.ends_with("\\%")
         && !right[..right.len() - 1].contains(is_like_pattern)
     {
         // fast path, can use starts_with
         let start_str = &right[..right.len() - 1].to_uppercase();
-        for i in 0..left.len() {
-            unsafe {
-                if left
-                    .value_unchecked(i)
-                    .to_uppercase()
-                    .starts_with(start_str)
-                {
-                    bit_util::set_bit(bool_slice, i);
-                }
-            }
-        }
+        Ok(BooleanArray::from_unary(left, |item| {
+            op(item.to_uppercase().starts_with(start_str))
+        }))
     } else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
         // fast path, can use ends_with
         let ends_str = &right[1..].to_uppercase();
 
-        for i in 0..left.len() {
-            unsafe {
-                if left.value_unchecked(i).to_uppercase().ends_with(ends_str) {
-                    bit_util::set_bit(bool_slice, i);
-                }
-            }
-        }
+        Ok(BooleanArray::from_unary(left, |item| {
+            op(item.to_uppercase().ends_with(ends_str))
+        }))
     } else if right.starts_with('%')
         && right.ends_with('%')
+        && !right.ends_with("\\%")
         && !right[1..right.len() - 1].contains(is_like_pattern)
     {
         // fast path, can use contains
         let contains = &right[1..right.len() - 1].to_uppercase();
-        for i in 0..left.len() {
-            unsafe {
-                if left.value_unchecked(i).to_uppercase().contains(contains) {
-                    bit_util::set_bit(bool_slice, i);
-                }
-            }
-        }
+        Ok(BooleanArray::from_unary(left, |item| {
+            op(item.to_uppercase().contains(contains))
+        }))
     } else {
         let re_pattern = replace_like_wildcards(right)?;
         let re = Regex::new(&format!("(?i)^{}$", re_pattern)).map_err(|e| {
@@ -653,26 +630,16 @@ fn ilike_scalar<'a, L: ArrayAccessor<Item = &'a str>>(
             ))
         })?;
 
-        for i in 0..left.len() {
-            let haystack = unsafe { left.value_unchecked(i) };
-            if re.is_match(haystack) {
-                bit_util::set_bit(bool_slice, i);
-            }
-        }
-    };
+        Ok(BooleanArray::from_unary(left, |item| op(re.is_match(item))))
+    }
+}
 
-    let data = unsafe {
-        ArrayData::new_unchecked(
-            DataType::Boolean,
-            left.len(),
-            None,
-            null_bit_buffer,
-            0,
-            vec![bool_buf.into()],
-            vec![],
-        )
-    };
-    Ok(BooleanArray::from(data))
+#[inline]
+fn ilike_scalar<'a, L: ArrayAccessor<Item = &'a str>>(
+    left: L,
+    right: &str,
+) -> Result<BooleanArray, ArrowError> {
+    ilike_scalar_op(left, right, |x| x)
 }
 
 /// Perform SQL `left ILIKE right` operation on [`StringArray`] /
@@ -852,91 +819,7 @@ fn nilike_scalar<'a, L: ArrayAccessor<Item = &'a str>>(
     left: L,
     right: &str,
 ) -> Result<BooleanArray, ArrowError> {
-    let null_bit_buffer = left.data().null_buffer().cloned();
-    let bytes = bit_util::ceil(left.len(), 8);
-    let mut bool_buf = MutableBuffer::from_len_zeroed(bytes);
-    let bool_slice = bool_buf.as_slice_mut();
-
-    if !right.contains(is_like_pattern) {
-        // fast path, can use equals
-        let right_uppercase = right.to_uppercase();
-        for i in 0..left.len() {
-            unsafe {
-                if left.value_unchecked(i).to_uppercase() != right_uppercase {
-                    bit_util::set_bit(bool_slice, i);
-                }
-            }
-        }
-    } else if right.ends_with('%')
-        && !right.ends_with("\\%")
-        && !right[..right.len() - 1].contains(is_like_pattern)
-    {
-        // fast path, can use starts_with
-        let start_str = &right[..right.len() - 1].to_uppercase();
-        for i in 0..left.len() {
-            unsafe {
-                if !(left
-                    .value_unchecked(i)
-                    .to_uppercase()
-                    .starts_with(start_str))
-                {
-                    bit_util::set_bit(bool_slice, i);
-                }
-            }
-        }
-    } else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
-        // fast path, can use ends_with
-        let ends_str = &right[1..].to_uppercase();
-
-        for i in 0..left.len() {
-            unsafe {
-                if 
!(left.value_unchecked(i).to_uppercase().ends_with(ends_str)) {
-                    bit_util::set_bit(bool_slice, i);
-                }
-            }
-        }
-    } else if right.starts_with('%')
-        && right.ends_with('%')
-        && !right[1..right.len() - 1].contains(is_like_pattern)
-    {
-        // fast path, can use contains
-        let contains = &right[1..right.len() - 1].to_uppercase();
-        for i in 0..left.len() {
-            unsafe {
-                if 
!(left.value_unchecked(i).to_uppercase().contains(contains)) {
-                    bit_util::set_bit(bool_slice, i);
-                }
-            }
-        }
-    } else {
-        let re_pattern = replace_like_wildcards(right)?;
-        let re = Regex::new(&format!("(?i)^{}$", re_pattern)).map_err(|e| {
-            ArrowError::ComputeError(format!(
-                "Unable to build regex from ILIKE pattern: {}",
-                e
-            ))
-        })?;
-
-        for i in 0..left.len() {
-            let haystack = unsafe { left.value_unchecked(i) };
-            if !re.is_match(haystack) {
-                bit_util::set_bit(bool_slice, i);
-            }
-        }
-    };
-
-    let data = unsafe {
-        ArrayData::new_unchecked(
-            DataType::Boolean,
-            left.len(),
-            None,
-            null_bit_buffer,
-            0,
-            vec![bool_buf.into()],
-            vec![],
-        )
-    };
-    Ok(BooleanArray::from(data))
+    ilike_scalar_op(left, right, |x| !x)
 }
 
 /// Perform SQL `left NOT ILIKE right` operation on [`StringArray`] /

Reply via email to