hhhizzz commented on code in PR #8733:
URL: https://github.com/apache/arrow-rs/pull/8733#discussion_r2506939972


##########
parquet/src/arrow/arrow_reader/selection.rs:
##########
@@ -691,6 +736,180 @@ fn union_row_selections(left: &[RowSelector], right: 
&[RowSelector]) -> RowSelec
     iter.collect()
 }
 
+/// Cursor for iterating a [`RowSelection`] during execution within a
+/// [`ReadPlan`](crate::arrow::arrow_reader::ReadPlan).
+///
+/// This keeps per-reader state such as the current position and delegates the
+/// actual storage strategy to the internal `RowSelectionBacking`.
+#[derive(Debug)]
+pub struct RowSelectionCursor {
+    /// Backing storage describing how the selection is materialised
+    storage: RowSelectionBacking,
+    /// Current absolute offset into the selection
+    position: usize,
+}
+
+/// Backing storage that powers [`RowSelectionCursor`].
+///
+/// The cursor either walks a boolean mask (dense representation) or a queue
+/// of [`RowSelector`] ranges (sparse representation).
+#[derive(Debug)]
+pub enum RowSelectionBacking {
+    Mask(BooleanBuffer),
+    Selectors(VecDeque<RowSelector>),
+}
+
+/// Result of computing the next chunk to read when using a bitmap mask
+#[derive(Debug)]
+pub struct MaskChunk {
+    /// Number of leading rows to skip before reaching selected rows
+    pub initial_skip: usize,
+    /// Total rows covered by this chunk (selected + skipped)
+    pub chunk_rows: usize,
+    /// Rows actually selected within the chunk
+    pub selected_rows: usize,
+    /// Starting offset within the mask where the chunk begins
+    pub mask_start: usize,
+}
+
+impl RowSelectionCursor {
+    /// Create a cursor, choosing an efficient backing representation
+    pub(crate) fn new(selectors: Vec<RowSelector>, strategy: 
RowSelectionStrategy) -> Self {
+        let storage = match strategy {
+            RowSelectionStrategy::Mask => {
+                
RowSelectionBacking::Mask(boolean_mask_from_selectors(&selectors))
+            }
+            RowSelectionStrategy::Selectors => 
RowSelectionBacking::Selectors(selectors.into()),
+        };
+
+        Self {
+            storage,
+            position: 0,
+        }
+    }
+
+    /// Returns `true` when no further rows remain
+    pub fn is_empty(&self) -> bool {
+        match &self.storage {
+            RowSelectionBacking::Mask(mask) => self.position >= mask.len(),
+            RowSelectionBacking::Selectors(selectors) => selectors.is_empty(),
+        }
+    }
+
+    /// Current position within the overall selection
+    pub fn position(&self) -> usize {
+        self.position
+    }
+
+    /// Return the next [`RowSelector`] when using the sparse representation
+    pub fn next_selector(&mut self) -> Option<RowSelector> {
+        match &mut self.storage {
+            RowSelectionBacking::Selectors(selectors) => {
+                let selector = selectors.pop_front()?;
+                self.position += selector.row_count;
+                Some(selector)

Review Comment:
   Good find! Updated



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to