alamb commented on code in PR #7513:
URL: https://github.com/apache/arrow-rs/pull/7513#discussion_r2121242811
##########
arrow-select/src/filter.rs:
##########
@@ -337,7 +367,95 @@ impl FilterPredicate {
}
}
-fn filter_array(values: &dyn Array, predicate: &FilterPredicate) ->
Result<ArrayRef, ArrowError> {
+/// Appends the nulls from array for any filtered rows to the
`null_buffer_builder`
+///
+/// Panics if the strategy is
+/// [`IterationStrategy::All`] or [`IterationStrategy::None`], which must be
handled by the caller
+fn append_filtered_nulls(
+ null_buffer_builder: &mut NullBufferBuilder,
+ array: &dyn Array,
+ predicate: &FilterPredicate,
+) {
+ assert!(!matches!(
+ predicate.strategy,
+ IterationStrategy::All | IterationStrategy::None
+ ));
+
+ let Some(nulls) = array.nulls() else {
+ // No nulls in the source array, so it anything selected by filter
will be non null as well
+ null_buffer_builder.append_n_non_nulls(predicate.count());
+ return;
+ };
+
+ // Filter the packed bitmask `buffer`, with `predicate` starting at bit
offset `offset`
+ // TODO: maybe this could be improved to avoid materializing the temporary
buffer
+ let nulls = filter_bits(nulls.inner(), predicate);
+ let nulls = NullBuffer::from(BooleanBuffer::new(nulls, 0,
predicate.count));
+
+ null_buffer_builder.append_buffer(&nulls);
+}
+
+impl<T: ArrowPrimitiveType> ArrayBuilderExtFilter for PrimitiveBuilder<T> {
+ fn append_filtered(
+ &mut self,
+ array: &dyn Array,
+ predicate: &FilterPredicate,
+ ) -> Result<(), ArrowError> {
+ // todo reserve space for the new values
+ // self.reserve(predicate.count);
+
+ let array = array.as_primitive::<T>();
+ let values = array.values();
+
+ assert!(values.len() >= predicate.filter.len());
+
+ let (values_builder, null_buffer_builder) = self.inner_mut();
+ match &predicate.strategy {
+ IterationStrategy::SlicesIterator => {
+ append_filtered_nulls(null_buffer_builder, array, predicate);
+ for (start, end) in SlicesIterator::new(&predicate.filter) {
+ values_builder.append_slice(&values[start..end]);
+ }
+ }
+ IterationStrategy::Slices(slices) => {
+ append_filtered_nulls(null_buffer_builder, array, predicate);
+ for (start, end) in slices {
+ values_builder.append_slice(&values[*start..*end]);
+ }
+ }
+ IterationStrategy::IndexIterator => {
+ append_filtered_nulls(null_buffer_builder, array, predicate);
+ let iter =
+ IndexIterator::new(&predicate.filter,
predicate.count).map(|x| values[x]);
+ // SAFETY: IndexIterator is trusted length
+ unsafe { values_builder.append_trusted_len_iter(iter) }
Review Comment:
Great find:
- https://github.com/apache/arrow-rs/pull/7590
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]