jroddev commented on code in PR #6582:
URL: https://github.com/apache/arrow-rs/pull/6582#discussion_r1806029445


##########
parquet/src/arrow/async_reader/mod.rs:
##########
@@ -2037,4 +2039,105 @@ mod tests {
         // Should only have made 3 requests
         assert_eq!(requests.lock().unwrap().len(), 3);
     }
+
+    #[tokio::test]
+    async fn empty_ofset_index_doesnt_panic_in_read_row_group() {
+        use tokio::fs::File;
+        let testdata = arrow::util::test_util::parquet_test_data();
+        let path = format!("{testdata}/alltypes_plain.parquet");
+        let mut file = File::open(&path).await.unwrap();
+        let file_size = file.metadata().await.unwrap().len();
+        let mut metadata = ParquetMetaDataReader::new()
+            .with_page_indexes(true)
+            .load_and_finish(&mut file, file_size as usize)
+            .await
+            .unwrap();
+
+        metadata.set_offset_index(Some(vec![]));
+        let options = ArrowReaderOptions::new().with_page_index(true);
+        let arrow_reader_metadata = 
ArrowReaderMetadata::try_new(metadata.into(), options).unwrap();
+        let reader =
+            ParquetRecordBatchStreamBuilder::new_with_metadata(file, 
arrow_reader_metadata)
+                .build()
+                .unwrap();
+
+        let result = reader.try_collect::<Vec<_>>().await.unwrap();
+        assert_eq!(result.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn non_empty_ofset_index_doesnt_panic_in_read_row_group() {
+        use tokio::fs::File;
+        let testdata = arrow::util::test_util::parquet_test_data();
+        let path = format!("{testdata}/alltypes_tiny_pages.parquet");
+        let mut file = File::open(&path).await.unwrap();
+        let file_size = file.metadata().await.unwrap().len();
+        let metadata = ParquetMetaDataReader::new()
+            .with_page_indexes(true)
+            .load_and_finish(&mut file, file_size as usize)
+            .await
+            .unwrap();
+
+        let options = ArrowReaderOptions::new().with_page_index(true);
+        let arrow_reader_metadata = 
ArrowReaderMetadata::try_new(metadata.into(), options).unwrap();
+        let reader =
+            ParquetRecordBatchStreamBuilder::new_with_metadata(file, 
arrow_reader_metadata)
+                .build()
+                .unwrap();
+
+        let result = reader.try_collect::<Vec<_>>().await.unwrap();
+        assert_eq!(result.len(), 8);
+    }
+
+    #[tokio::test]
+    async fn empty_offset_index_doesnt_panic_in_column_chunks() {

Review Comment:
   This test aims to replicate the writing and reloading of metadata as is done 
in the `external_metadata` example.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to