alamb commented on code in PR #6629:
URL: https://github.com/apache/arrow-rs/pull/6629#discussion_r1820725065


##########
parquet/src/file/writer.rs:
##########
@@ -1892,6 +1897,44 @@ mod tests {
         assert_eq!(page_sizes[0], unenc_size);
     }
 
+    #[test]
+    fn test_too_many_rowgroups() {
+        let message_type = "
+            message test_schema {
+                REQUIRED BYTE_ARRAY a (UTF8);
+            }
+        ";
+        let schema = Arc::new(parse_message_type(message_type).unwrap());
+        let file: File = tempfile::tempfile().unwrap();
+        let props = Arc::new(
+            WriterProperties::builder()
+                .set_statistics_enabled(EnabledStatistics::None)
+                .set_max_row_group_size(1)
+                .build(),
+        );
+        let mut writer = SerializedFileWriter::new(&file, schema, 
props).unwrap();
+
+        // Create 32k empty rowgroups. Should error when i == 32768.
+        for i in 0..0x8001 {
+            match writer.next_row_group() {
+                Ok(mut row_group_writer) => {
+                    assert_ne!(i, 0x8000);
+                    let col_writer = 
row_group_writer.next_column().unwrap().unwrap();
+                    col_writer.close().unwrap();
+                    row_group_writer.close().unwrap();
+                }
+                Err(e) => {
+                    assert_eq!(i, 0x8000);
+                    assert_eq!(
+                        e.to_string(),
+                        "Parquet error: Parquet does not support more than 
32767 row groups per file (currently: 32768)"

Review Comment:
   💯 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to