Dandandan commented on code in PR #7513:
URL: https://github.com/apache/arrow-rs/pull/7513#discussion_r2122316118


##########
arrow-select/src/filter.rs:
##########
@@ -744,23 +828,82 @@ where
     GenericByteArray::from(data)
 }
 
+impl<T: ByteViewType> ArrayBuilderExtFilter for GenericByteViewBuilder<T> {
+    fn append_filtered(
+        &mut self,
+        array: &dyn Array,
+        predicate: &FilterPredicate,
+    ) -> Result<(), ArrowError> {
+        // todo reserve space for the new values
+        // self.reserve(predicate.count);
+
+        let array = array.as_byte_view::<T>();
+        let views = array.views();
+        assert!(views.len() >= predicate.filter.len());
+
+        // Note, this is basically the same as `filter_native` -- TODO figure 
out how to reuse that code (maybe also for filter_primitive)
+        let (view_builder, null_buffer_builder) = self.inner_mut();
+        let starting_view = view_builder.len();
+        match &predicate.strategy {
+            IterationStrategy::SlicesIterator => {
+                append_filtered_nulls(null_buffer_builder, array, predicate);
+                for (start, end) in SlicesIterator::new(&predicate.filter) {
+                    view_builder.append_slice(&views[start..end]);
+                }
+            }
+            IterationStrategy::Slices(slices) => {
+                append_filtered_nulls(null_buffer_builder, array, predicate);
+                for (start, end) in slices {
+                    view_builder.append_slice(&views[*start..*end]);
+                }
+            }
+            IterationStrategy::IndexIterator => {
+                append_filtered_nulls(null_buffer_builder, array, predicate);
+                let iter = IndexIterator::new(&predicate.filter, 
predicate.count).map(|x| views[x]);
+
+                // SAFETY: IndexIterator is trusted length
+                unsafe { view_builder.append_trusted_len_iter(iter) }
+            }
+            IterationStrategy::Indices(indices) => {
+                append_filtered_nulls(null_buffer_builder, array, predicate);
+                let iter = indices.iter().map(|x| views[*x]);
+                // Safety: Vec + Map knows its length correctly
+                unsafe {
+                    view_builder.append_trusted_len_iter(iter);
+                }
+            }
+            IterationStrategy::All => {
+                // handles nulls and compaction as well
+                self.append_array(array);
+                return Ok(());
+            }
+            IterationStrategy::None => {
+                return Ok(());
+            }
+        }
+
+        // Flush the in-progress buffer
+        unsafe {
+            // All views were coped from `array`
+            self.finalize_copied_views(starting_view, array);

Review Comment:
   it would be good to delay this until finishing the buffer (so it can 
allocate larger buffer and go through the views in one go.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to