Dandandan commented on code in PR #7513: URL: https://github.com/apache/arrow-rs/pull/7513#discussion_r2122316118
########## arrow-select/src/filter.rs: ########## @@ -744,23 +828,82 @@ where GenericByteArray::from(data) } +impl<T: ByteViewType> ArrayBuilderExtFilter for GenericByteViewBuilder<T> { + fn append_filtered( + &mut self, + array: &dyn Array, + predicate: &FilterPredicate, + ) -> Result<(), ArrowError> { + // todo reserve space for the new values + // self.reserve(predicate.count); + + let array = array.as_byte_view::<T>(); + let views = array.views(); + assert!(views.len() >= predicate.filter.len()); + + // Note, this is basically the same as `filter_native` -- TODO figure out how to reuse that code (maybe also for filter_primitive) + let (view_builder, null_buffer_builder) = self.inner_mut(); + let starting_view = view_builder.len(); + match &predicate.strategy { + IterationStrategy::SlicesIterator => { + append_filtered_nulls(null_buffer_builder, array, predicate); + for (start, end) in SlicesIterator::new(&predicate.filter) { + view_builder.append_slice(&views[start..end]); + } + } + IterationStrategy::Slices(slices) => { + append_filtered_nulls(null_buffer_builder, array, predicate); + for (start, end) in slices { + view_builder.append_slice(&views[*start..*end]); + } + } + IterationStrategy::IndexIterator => { + append_filtered_nulls(null_buffer_builder, array, predicate); + let iter = IndexIterator::new(&predicate.filter, predicate.count).map(|x| views[x]); + + // SAFETY: IndexIterator is trusted length + unsafe { view_builder.append_trusted_len_iter(iter) } + } + IterationStrategy::Indices(indices) => { + append_filtered_nulls(null_buffer_builder, array, predicate); + let iter = indices.iter().map(|x| views[*x]); + // Safety: Vec + Map knows its length correctly + unsafe { + view_builder.append_trusted_len_iter(iter); + } + } + IterationStrategy::All => { + // handles nulls and compaction as well + self.append_array(array); + return Ok(()); + } + IterationStrategy::None => { + return Ok(()); + } + } + + // Flush the in-progress buffer + unsafe { + // All views were coped from `array` + self.finalize_copied_views(starting_view, array); Review Comment: it would be good to delay this until finishing the buffer (so it can allocate larger buffer and go through the views in one go. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org