alamb commented on code in PR #6118:
URL: https://github.com/apache/arrow-rs/pull/6118#discussion_r1693931753


##########
arrow-string/src/predicate.rs:
##########
@@ -128,14 +130,32 @@ impl<'a> Predicate<'a> {
     }
 }
 
-fn starts_with_ignore_ascii_case(haystack: &str, needle: &str) -> bool {
-    let end = haystack.len().min(needle.len());
-    haystack.is_char_boundary(end) && 
needle.eq_ignore_ascii_case(&haystack[..end])
+fn starts_with(haystack: &str, needle: &str, byte_eq_kernel: impl Fn((&u8, 
&u8)) -> bool) -> bool {
+    if needle.len() > haystack.len() {
+        false
+    } else {
+        std::iter::zip(haystack.as_bytes(), 
needle.as_bytes()).all(byte_eq_kernel)
+    }
+}
+
+fn ends_with(haystack: &str, needle: &str, byte_eq_kernel: impl Fn((&u8, &u8)) 
-> bool) -> bool {

Review Comment:
   ```suggestion
   /// This is faster than `str::starts_ends` for small strings. See 
   /// https://github.com/apache/arrow-rs/issues/6107
   fn ends_with(haystack: &str, needle: &str, byte_eq_kernel: impl Fn((&u8, 
&u8)) -> bool) -> bool {
   ```



##########
arrow-string/src/predicate.rs:
##########
@@ -128,14 +130,32 @@ impl<'a> Predicate<'a> {
     }
 }
 
-fn starts_with_ignore_ascii_case(haystack: &str, needle: &str) -> bool {
-    let end = haystack.len().min(needle.len());
-    haystack.is_char_boundary(end) && 
needle.eq_ignore_ascii_case(&haystack[..end])
+fn starts_with(haystack: &str, needle: &str, byte_eq_kernel: impl Fn((&u8, 
&u8)) -> bool) -> bool {

Review Comment:
   I think it might be worth leaving some comments here explaining that this 
implementation is faster than std::str for shorter strings with a pointer to 
the context
   
   ```suggestion
   /// This is faster than `str::starts_with` for small strings. See 
   /// https://github.com/apache/arrow-rs/issues/6107
   fn starts_with(haystack: &str, needle: &str, byte_eq_kernel: impl Fn((&u8, 
&u8)) -> bool) -> bool {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to