alamb commented on code in PR #7513:
URL: https://github.com/apache/arrow-rs/pull/7513#discussion_r2113872266


##########
arrow-select/src/filter.rs:
##########
@@ -337,7 +367,94 @@ impl FilterPredicate {
     }
 }
 
-fn filter_array(values: &dyn Array, predicate: &FilterPredicate) -> 
Result<ArrayRef, ArrowError> {
+/// Appends the nulls from array for any filtered rows to the 
`null_buffer_builder`
+///
+/// Panics if the strategy is
+/// [`IterationStrategy::All`] or [`IterationStrategy::None`], which must be 
handled by the caller
+fn append_filtered_nulls(
+    null_buffer_builder: &mut NullBufferBuilder,
+    array: &dyn Array,
+    predicate: &FilterPredicate,
+) {
+    assert!(!matches!(
+        predicate.strategy,
+        IterationStrategy::All | IterationStrategy::None
+    ));
+
+    let Some(nulls) = array.nulls() else {
+        // No nulls in the source array, so it anything selected by filter 
will be non null as well
+        null_buffer_builder.append_n_non_nulls(predicate.count());
+        return;
+    };
+
+    // Filter the packed bitmask `buffer`, with `predicate` starting at bit 
offset `offset`
+    // TODO: maybe this could be improved to avoid materializing the temporary 
buffer
+    let nulls = filter_bits(nulls.inner(), predicate);
+    let nulls = NullBuffer::from(BooleanBuffer::new(nulls, 0, 
predicate.count));
+
+    null_buffer_builder.append_buffer(&nulls);
+}
+
+impl<T: ArrowPrimitiveType> ArrayBuilderExtFilter for PrimitiveBuilder<T> {
+    fn append_filtered(
+        &mut self,
+        array: &dyn Array,
+        predicate: &FilterPredicate,
+    ) -> Result<(), ArrowError> {
+        // todo reserve space for the new values
+        // self.reserve(predicate.count);
+
+        let array = array.as_primitive::<T>();
+        let values = array.values();
+
+        assert!(values.len() >= predicate.filter.len());
+
+        let (values_builder, null_buffer_builder) = self.inner_mut();
+        match &predicate.strategy {
+            IterationStrategy::SlicesIterator => {
+                append_filtered_nulls(null_buffer_builder, array, predicate);
+                for (start, end) in SlicesIterator::new(&predicate.filter) {
+                    values_builder.append_slice(&values[start..end]);
+                }
+            }
+            IterationStrategy::Slices(slices) => {
+                append_filtered_nulls(null_buffer_builder, array, predicate);
+                for (start, end) in slices {
+                    values_builder.append_slice(&values[*start..*end]);
+                }
+            }
+            IterationStrategy::IndexIterator => {
+                append_filtered_nulls(null_buffer_builder, array, predicate);
+                let iter =
+                    IndexIterator::new(&predicate.filter, 
predicate.count).map(|x| values[x]);
+                // SAFETY: IndexIterator is trusted length
+                unsafe { values_builder.append_trusted_len_iter(iter) }
+            }
+            IterationStrategy::Indices(indices) => {
+                append_filtered_nulls(null_buffer_builder, array, predicate);
+                let iter = indices.iter().map(|x| values[*x]);
+                for v in iter {

Review Comment:
   In 15541d2ad3



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to