alamb commented on code in PR #9755:
URL: https://github.com/apache/arrow-rs/pull/9755#discussion_r3348373045
##########
arrow-select/src/filter.rs:
##########
@@ -366,6 +366,66 @@ impl IterationStrategy {
}
}
+/// Borrowed description of which rows a [`FilterPredicate`] selects.
+pub(crate) enum FilterSelection<'a> {
+ None,
+ All { len: usize },
+ Slices(FilterSlices<'a>),
+ Indices(FilterIndices<'a>),
+}
+
+pub(crate) type FilterSlices<'a> =
+ FilterIterator<std::iter::Copied<std::slice::Iter<'a, (usize, usize)>>,
SlicesIterator<'a>>;
+
+pub(crate) type FilterIndices<'a> =
+ FilterIterator<std::iter::Copied<std::slice::Iter<'a, usize>>,
IndexIterator<'a>>;
+
+/// Holds either materialized rows or a lazy iterator.
+///
+/// This does not implement [`Iterator`] on purpose. Callers use
Review Comment:
👍
##########
arrow-select/src/coalesce/primitive.rs:
##########
@@ -106,3 +153,44 @@ impl<T: ArrowPrimitiveType + Debug> InProgressArray for
InProgressPrimitiveArray
Ok(Arc::new(array))
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::filter::FilterBuilder;
+ use arrow_array::types::Int32Type;
+ use arrow_array::{BooleanArray, Int32Array};
+
+ #[test]
+ fn test_copy_rows_by_filter_index_iterator() {
Review Comment:
I think it is better if this test was in terms of the public api
(`BatchCoalescer`) but I think we (I) can do that as a follow on PR
##########
arrow-select/src/coalesce.rs:
##########
@@ -238,10 +238,7 @@ impl BatchCoalescer {
batch: RecordBatch,
filter: &BooleanArray,
) -> Result<(), ArrowError> {
- // TODO: optimize this to avoid materializing (copying the results
Review Comment:
🎉
##########
arrow-select/src/filter.rs:
##########
@@ -366,6 +366,66 @@ impl IterationStrategy {
}
}
+/// Borrowed description of which rows a [`FilterPredicate`] selects.
Review Comment:
💯
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]