alamb commented on code in PR #7513: URL: https://github.com/apache/arrow-rs/pull/7513#discussion_r2123818214
########## arrow-select/src/filter.rs: ########## @@ -744,23 +828,82 @@ where GenericByteArray::from(data) } +impl<T: ByteViewType> ArrayBuilderExtFilter for GenericByteViewBuilder<T> { + fn append_filtered( + &mut self, + array: &dyn Array, + predicate: &FilterPredicate, + ) -> Result<(), ArrowError> { + // todo reserve space for the new values + // self.reserve(predicate.count); + + let array = array.as_byte_view::<T>(); + let views = array.views(); + assert!(views.len() >= predicate.filter.len()); + + // Note, this is basically the same as `filter_native` -- TODO figure out how to reuse that code (maybe also for filter_primitive) + let (view_builder, null_buffer_builder) = self.inner_mut(); + let starting_view = view_builder.len(); + match &predicate.strategy { + IterationStrategy::SlicesIterator => { + append_filtered_nulls(null_buffer_builder, array, predicate); + for (start, end) in SlicesIterator::new(&predicate.filter) { + view_builder.append_slice(&views[start..end]); + } + } + IterationStrategy::Slices(slices) => { + append_filtered_nulls(null_buffer_builder, array, predicate); + for (start, end) in slices { + view_builder.append_slice(&views[*start..*end]); + } + } + IterationStrategy::IndexIterator => { + append_filtered_nulls(null_buffer_builder, array, predicate); + let iter = IndexIterator::new(&predicate.filter, predicate.count).map(|x| views[x]); + + // SAFETY: IndexIterator is trusted length + unsafe { view_builder.append_trusted_len_iter(iter) } + } + IterationStrategy::Indices(indices) => { + append_filtered_nulls(null_buffer_builder, array, predicate); + let iter = indices.iter().map(|x| views[*x]); + // Safety: Vec + Map knows its length correctly + unsafe { + view_builder.append_trusted_len_iter(iter); + } + } + IterationStrategy::All => { + // handles nulls and compaction as well + self.append_array(array); + return Ok(()); + } + IterationStrategy::None => { + return Ok(()); + } + } + + // Flush the in-progress buffer + unsafe { + // All views were coped from `array` + self.finalize_copied_views(starting_view, array); Review Comment: Right, but as mentioned above that will hold on to quite a bit more memory for selective filters. I have some other ideas of how to improve it -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org