tustvold commented on code in PR #2315:
URL: https://github.com/apache/arrow-rs/pull/2315#discussion_r939681372


##########
parquet/src/arrow/arrow_reader.rs:
##########
@@ -1126,36 +1178,49 @@ mod tests {
 
         file.rewind().unwrap();
 
-        let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
+        let mut arrow_reader;
+        let expected_data: Vec<Option<T::T>>;
+        if let Some((selections, row_count)) = opts.row_selections.clone() {
+            let options =
+                
ArrowReaderOptions::new().with_row_selection(selections.clone());
+            arrow_reader =
+                ParquetFileArrowReader::try_new_with_options(file, 
options).unwrap();
+            let mut without_skip_data = gen_expected_data::<T>(&def_levels, 
&values);
+
+            let mut skip_data: Vec<Option<T::T>> = vec![];
+            for select in selections {
+                if select.skip {
+                    without_skip_data.drain(0..select.row_count);
+                } else {
+                    
skip_data.extend(without_skip_data.drain(0..select.row_count));
+                }
+            }
+            expected_data = skip_data;
+            assert_eq!(expected_data.len(), row_count);
+        } else {
+            arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
+            //get flatten table data
+            expected_data = gen_expected_data::<T>(&def_levels, &values);
+            assert_eq!(expected_data.len(), opts.num_rows * 
opts.num_row_groups);
+        }
+
         let mut record_reader = arrow_reader
             .get_record_reader(opts.record_batch_size)
             .unwrap();
 
-        let expected_data: Vec<Option<T::T>> = match def_levels {
-            Some(levels) => {
-                let mut values_iter = values.iter().flatten();
-                levels
-                    .iter()
-                    .flatten()
-                    .map(|d| match d {
-                        1 => Some(values_iter.next().cloned().unwrap()),
-                        0 => None,
-                        _ => unreachable!(),
-                    })
-                    .collect()
-            }
-            None => values.iter().flatten().map(|b| Some(b.clone())).collect(),
-        };
-
-        assert_eq!(expected_data.len(), opts.num_rows * opts.num_row_groups);
-
         let mut total_read = 0;
         loop {
             let maybe_batch = record_reader.next();
             if total_read < expected_data.len() {
-                let end = min(total_read + opts.record_batch_size, 
expected_data.len());
+                let mut end =
+                    min(total_read + opts.record_batch_size, 
expected_data.len());
                 let batch = maybe_batch.unwrap().unwrap();
-                assert_eq!(end - total_read, batch.num_rows());
+                //TODO remove this after implement 
https://github.com/apache/arrow-rs/issues/2197

Review Comment:
   The linked issue has been done?



##########
parquet/src/arrow/arrow_reader.rs:
##########
@@ -1126,36 +1178,49 @@ mod tests {
 
         file.rewind().unwrap();
 
-        let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
+        let mut arrow_reader;
+        let expected_data: Vec<Option<T::T>>;
+        if let Some((selections, row_count)) = opts.row_selections.clone() {
+            let options =
+                
ArrowReaderOptions::new().with_row_selection(selections.clone());
+            arrow_reader =
+                ParquetFileArrowReader::try_new_with_options(file, 
options).unwrap();
+            let mut without_skip_data = gen_expected_data::<T>(&def_levels, 
&values);
+
+            let mut skip_data: Vec<Option<T::T>> = vec![];
+            for select in selections {
+                if select.skip {
+                    without_skip_data.drain(0..select.row_count);

Review Comment:
   Nice :ok_hand: 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to