alamb commented on code in PR #6118:
URL: https://github.com/apache/arrow-rs/pull/6118#discussion_r1693931753
##########
arrow-string/src/predicate.rs:
##########
@@ -128,14 +130,32 @@ impl<'a> Predicate<'a> {
}
}
-fn starts_with_ignore_ascii_case(haystack: &str, needle: &str) -> bool {
- let end = haystack.len().min(needle.len());
- haystack.is_char_boundary(end) &&
needle.eq_ignore_ascii_case(&haystack[..end])
+fn starts_with(haystack: &str, needle: &str, byte_eq_kernel: impl Fn((&u8,
&u8)) -> bool) -> bool {
+ if needle.len() > haystack.len() {
+ false
+ } else {
+ std::iter::zip(haystack.as_bytes(),
needle.as_bytes()).all(byte_eq_kernel)
+ }
+}
+
+fn ends_with(haystack: &str, needle: &str, byte_eq_kernel: impl Fn((&u8, &u8))
-> bool) -> bool {
Review Comment:
```suggestion
/// This is faster than `str::starts_ends` for small strings. See
/// https://github.com/apache/arrow-rs/issues/6107
fn ends_with(haystack: &str, needle: &str, byte_eq_kernel: impl Fn((&u8,
&u8)) -> bool) -> bool {
```
##########
arrow-string/src/predicate.rs:
##########
@@ -128,14 +130,32 @@ impl<'a> Predicate<'a> {
}
}
-fn starts_with_ignore_ascii_case(haystack: &str, needle: &str) -> bool {
- let end = haystack.len().min(needle.len());
- haystack.is_char_boundary(end) &&
needle.eq_ignore_ascii_case(&haystack[..end])
+fn starts_with(haystack: &str, needle: &str, byte_eq_kernel: impl Fn((&u8,
&u8)) -> bool) -> bool {
Review Comment:
I think it might be worth leaving some comments here explaining that this
implementation is faster than std::str for shorter strings with a pointer to
the context
```suggestion
/// This is faster than `str::starts_with` for small strings. See
/// https://github.com/apache/arrow-rs/issues/6107
fn starts_with(haystack: &str, needle: &str, byte_eq_kernel: impl Fn((&u8,
&u8)) -> bool) -> bool {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]