This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 24f455efec Undo run end filter performance regression (#6691)
24f455efec is described below

commit 24f455efec6d94c9a70ce18499a25e1ecbb6c4be
Author: delamarch3 <[email protected]>
AuthorDate: Sun Nov 10 18:17:35 2024 +0000

    Undo run end filter performance regression (#6691)
    
    * ensure predicate and values have the same length before passing on to 
filter_run_end_array
    
    * fix error wording
    
    * have filter_run_end_array use filter array with run_ends max value size
    
    * use skip and take to iterate over filter values in filter_run_end_array
    
    * check array values in max_value_gt_predicate_len test
    
    * run end filter performance regression
    
    * use names consistent with other functions
    
    * clippy
---
 arrow-select/src/filter.rs | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index 451b044859..4c6a5c0668 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -423,30 +423,30 @@ fn filter_array(values: &dyn Array, predicate: 
&FilterPredicate) -> Result<Array
 
 /// Filter any supported [`RunArray`] based on a [`FilterPredicate`]
 fn filter_run_end_array<R: RunEndIndexType>(
-    re_arr: &RunArray<R>,
-    pred: &FilterPredicate,
+    array: &RunArray<R>,
+    predicate: &FilterPredicate,
 ) -> Result<RunArray<R>, ArrowError>
 where
     R::Native: Into<i64> + From<bool>,
     R::Native: AddAssign,
 {
-    let run_ends: &RunEndBuffer<R::Native> = re_arr.run_ends();
+    let run_ends: &RunEndBuffer<R::Native> = array.run_ends();
     let mut values_filter = BooleanBufferBuilder::new(run_ends.len());
     let mut new_run_ends = vec![R::default_value(); run_ends.len()];
 
-    let mut start = 0i64;
+    let mut start = 0u64;
     let mut i = 0;
     let mut count = R::default_value();
-    let filter_values = pred.filter.values();
+    let filter_values = predicate.filter.values();
 
-    for end in run_ends.inner().into_iter().map(|i| (*i).into()) {
+    for mut end in run_ends.inner().into_iter().map(|i| (*i).into() as u64) {
         let mut keep = false;
 
-        for pred in filter_values
-            .iter()
-            .skip(start as usize)
-            .take((end - start) as usize)
-        {
+        let difference = end.saturating_sub(filter_values.len() as u64);
+        end -= difference;
+
+        // Safety: we subtract the difference off `end` so we are always 
within bounds
+        for pred in (start..end).map(|i| unsafe { 
filter_values.value_unchecked(i as usize) }) {
             count += R::Native::from(pred);
             keep |= pred
         }
@@ -465,7 +465,7 @@ where
         new_run_ends.clear();
     }
 
-    let values = re_arr.values();
+    let values = array.values();
     let pred = BooleanArray::new(values_filter.finish(), None);
     let values = filter(&values, &pred)?;
 

Reply via email to