This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 38764c26d Make skip_records in complex_object_array can skip cross row 
groups (#2332)
38764c26d is described below

commit 38764c26db511ea13538042f229e817562f02f74
Author: Yang Jiang <[email protected]>
AuthorDate: Sun Aug 7 02:23:46 2022 +0800

    Make skip_records in complex_object_array can skip cross row groups (#2332)
    
    * Make skip_records in complex_object_array can skip cross row groups
    
    * Update parquet/src/arrow/array_reader/complex_object_array.rs
    
    Co-authored-by: Kun Liu <[email protected]>
    
    Co-authored-by: Kun Liu <[email protected]>
---
 .../src/arrow/array_reader/complex_object_array.rs | 27 +++++++++++++---------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/parquet/src/arrow/array_reader/complex_object_array.rs 
b/parquet/src/arrow/array_reader/complex_object_array.rs
index 79b537331..4f958fea4 100644
--- a/parquet/src/arrow/array_reader/complex_object_array.rs
+++ b/parquet/src/arrow/array_reader/complex_object_array.rs
@@ -197,19 +197,24 @@ where
     }
 
     fn skip_records(&mut self, num_records: usize) -> Result<usize> {
-        match self.column_reader.as_mut() {
-            Some(reader) => reader.skip_records(num_records),
-            None => {
-                if self.next_column_reader()? {
-                    self.column_reader
-                        .as_mut()
-                        .unwrap()
-                        .skip_records(num_records)
-                } else {
-                    Ok(0)
-                }
+        let mut num_read = 0;
+        while (self.column_reader.is_some() || self.next_column_reader()?)
+            && num_read < num_records
+        {
+            let remain_to_skip = num_records - num_read;
+            let skip = self
+                .column_reader
+                .as_mut()
+                .unwrap()
+                .skip_records(remain_to_skip)?;
+            num_read += skip;
+            //  skip < remain_to_skip means end of row group
+            //  self.next_column_reader() == false means end of file
+            if skip < remain_to_skip && !self.next_column_reader()? {
+                break;
             }
         }
+        Ok(num_read)
     }
 
     fn get_def_levels(&self) -> Option<&[i16]> {

Reply via email to