This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new b2f0c65e1 Fix ilike_utf8_scalar kernals (#2545)
b2f0c65e1 is described below
commit b2f0c65e15a40c7f2f01e1d96335ea8ec362f65d
Author: Vrishabh <[email protected]>
AuthorDate: Tue Aug 23 16:10:22 2022 +0530
Fix ilike_utf8_scalar kernals (#2545)
* fix ilike kernals
* minor refactor for perf improvements
* Remove wrongly added file
---
arrow/src/compute/kernels/comparison.rs | 50 +++++++++++++--------------------
1 file changed, 19 insertions(+), 31 deletions(-)
diff --git a/arrow/src/compute/kernels/comparison.rs
b/arrow/src/compute/kernels/comparison.rs
index 39828b64f..714b4b1b7 100644
--- a/arrow/src/compute/kernels/comparison.rs
+++ b/arrow/src/compute/kernels/comparison.rs
@@ -467,29 +467,24 @@ pub fn ilike_utf8_scalar<OffsetSize: OffsetSizeTrait>(
if !right.contains(is_like_pattern) {
// fast path, can use equals
+ let right_uppercase = right.to_uppercase();
for i in 0..left.len() {
- result.append(left.value(i) == right);
+ result.append(left.value(i).to_uppercase() == right_uppercase);
}
} else if right.ends_with('%')
&& !right.ends_with("\\%")
&& !right[..right.len() - 1].contains(is_like_pattern)
{
- // fast path, can use ends_with
+ // fast path, can use starts_with
+ let start_str = &right[..right.len() - 1].to_uppercase();
for i in 0..left.len() {
- result.append(
- left.value(i)
- .to_uppercase()
- .starts_with(&right[..right.len() - 1].to_uppercase()),
- );
+ result.append(left.value(i).to_uppercase().starts_with(start_str));
}
} else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
- // fast path, can use starts_with
+ // fast path, can use ends_with
+ let ends_str = &right[1..].to_uppercase();
for i in 0..left.len() {
- result.append(
- left.value(i)
- .to_uppercase()
- .ends_with(&right[1..].to_uppercase()),
- );
+ result.append(left.value(i).to_uppercase().ends_with(ends_str));
}
} else {
let re_pattern = replace_like_wildcards(right)?;
@@ -550,31 +545,24 @@ pub fn nilike_utf8_scalar<OffsetSize: OffsetSizeTrait>(
if !right.contains(is_like_pattern) {
// fast path, can use equals
+ let right_uppercase = right.to_uppercase();
for i in 0..left.len() {
- result.append(left.value(i) != right);
+ result.append(left.value(i).to_uppercase() != right_uppercase);
}
} else if right.ends_with('%')
&& !right.ends_with("\\%")
&& !right[..right.len() - 1].contains(is_like_pattern)
{
- // fast path, can use ends_with
+ // fast path, can use starts_with
+ let start_str = &right[..right.len() - 1].to_uppercase();
for i in 0..left.len() {
- result.append(
- !left
- .value(i)
- .to_uppercase()
- .starts_with(&right[..right.len() - 1].to_uppercase()),
- );
+
result.append(!left.value(i).to_uppercase().starts_with(start_str));
}
} else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
- // fast path, can use starts_with
+ // fast path, can use ends_with
+ let end_str = &right[1..].to_uppercase();
for i in 0..left.len() {
- result.append(
- !left
- .value(i)
- .to_uppercase()
- .ends_with(&right[1..].to_uppercase()),
- );
+ result.append(!left.value(i).to_uppercase().ends_with(end_str));
}
} else {
let re_pattern = replace_like_wildcards(right)?;
@@ -4181,7 +4169,7 @@ mod tests {
test_utf8_scalar!(
test_utf8_array_ilike_scalar_equals,
vec!["arrow", "parrow", "arrows", "arr"],
- "arrow",
+ "Arrow",
ilike_utf8_scalar,
vec![true, false, false, false]
);
@@ -4234,8 +4222,8 @@ mod tests {
test_utf8_scalar!(
test_utf8_array_nilike_scalar_equals,
- vec!["arrow", "parrow", "arrows", "arr"],
- "arrow",
+ vec!["arRow", "parrow", "arrows", "arr"],
+ "Arrow",
nilike_utf8_scalar,
vec![false, true, true, true]
);