Dandandan commented on code in PR #7537:
URL: https://github.com/apache/arrow-rs/pull/7537#discussion_r2105229592


##########
parquet/src/arrow/arrow_reader/read_plan.rs:
##########
@@ -131,13 +129,101 @@ impl ReadPlanBuilder {
             selection,
         } = self;
 
-        let selection = selection.map(|s| s.trim().into());
+        // If the batch size is 0, read "all rows"
+        if batch_size == 0 {
+            return ReadPlan::All { batch_size: 0 };
+        }
+
+        // If no selection is provided, read all rows
+        let Some(selection) = selection else {
+            return ReadPlan::All { batch_size };
+        };
+
+        let iterator = SelectionIterator::new(batch_size, selection.into());
+        ReadPlan::Subset { iterator }
+    }
+}
+
+/// Incrementally returns [`RowSelector`]s that describe reading from a 
Parquet file.
+///
+/// The returned stream of [`RowSelector`]s is guaranteed to have:
+/// 1. No empty selections (that select no rows)
+/// 2. No selections that span batch_size boundaries
+/// 3. No trailing skip selections
+///
+/// For example, if the `batch_size` is 100 and we are selecting all 200 rows
+/// from a Parquet file, the selectors will be:
+/// - `RowSelector::select(100)  <-- forced break at batch_size boundary`
+/// - `RowSelector::select(100)`
+#[derive(Debug, Clone)]
+pub(crate) struct SelectionIterator {
+    /// how many rows to read in each batch
+    batch_size: usize,
+    /// how many records have been read by RowSelection in the "current" batch
+    read_records: usize,
+    /// Input selectors to read from
+    input_selectors: VecDeque<RowSelector>,

Review Comment:
   I think `Vec` can be used here (track an index)? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to