This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 24f455efec Undo run end filter performance regression (#6691)
24f455efec is described below
commit 24f455efec6d94c9a70ce18499a25e1ecbb6c4be
Author: delamarch3 <[email protected]>
AuthorDate: Sun Nov 10 18:17:35 2024 +0000
Undo run end filter performance regression (#6691)
* ensure predicate and values have the same length before passing on to
filter_run_end_array
* fix error wording
* have filter_run_end_array use filter array with run_ends max value size
* use skip and take to iterate over filter values in filter_run_end_array
* check array values in max_value_gt_predicate_len test
* run end filter performance regression
* use names consistent with other functions
* clippy
---
arrow-select/src/filter.rs | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index 451b044859..4c6a5c0668 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -423,30 +423,30 @@ fn filter_array(values: &dyn Array, predicate:
&FilterPredicate) -> Result<Array
/// Filter any supported [`RunArray`] based on a [`FilterPredicate`]
fn filter_run_end_array<R: RunEndIndexType>(
- re_arr: &RunArray<R>,
- pred: &FilterPredicate,
+ array: &RunArray<R>,
+ predicate: &FilterPredicate,
) -> Result<RunArray<R>, ArrowError>
where
R::Native: Into<i64> + From<bool>,
R::Native: AddAssign,
{
- let run_ends: &RunEndBuffer<R::Native> = re_arr.run_ends();
+ let run_ends: &RunEndBuffer<R::Native> = array.run_ends();
let mut values_filter = BooleanBufferBuilder::new(run_ends.len());
let mut new_run_ends = vec![R::default_value(); run_ends.len()];
- let mut start = 0i64;
+ let mut start = 0u64;
let mut i = 0;
let mut count = R::default_value();
- let filter_values = pred.filter.values();
+ let filter_values = predicate.filter.values();
- for end in run_ends.inner().into_iter().map(|i| (*i).into()) {
+ for mut end in run_ends.inner().into_iter().map(|i| (*i).into() as u64) {
let mut keep = false;
- for pred in filter_values
- .iter()
- .skip(start as usize)
- .take((end - start) as usize)
- {
+ let difference = end.saturating_sub(filter_values.len() as u64);
+ end -= difference;
+
+ // Safety: we subtract the difference off `end` so we are always
within bounds
+ for pred in (start..end).map(|i| unsafe {
filter_values.value_unchecked(i as usize) }) {
count += R::Native::from(pred);
keep |= pred
}
@@ -465,7 +465,7 @@ where
new_run_ends.clear();
}
- let values = re_arr.values();
+ let values = array.values();
let pred = BooleanArray::new(values_filter.finish(), None);
let values = filter(&values, &pred)?;