This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 55fe847691 Return error rather than panic when too many row groups are 
written (#6629)
55fe847691 is described below

commit 55fe847691fad99d06576c9b39cb45ceeff2fba4
Author: Ed Seidl <[email protected]>
AuthorDate: Mon Oct 28 15:35:36 2024 -0700

    Return error rather than panic when too many row groups are written (#6629)
    
    * return error rather than panic when too many row groups
    
    * clean up test a bit
---
 parquet/src/file/writer.rs | 45 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index 95ff109a3d..b84c57a60e 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -378,7 +378,12 @@ fn write_bloom_filters<W: Write + Send>(
         .ordinal()
         .expect("Missing row group ordinal")
         .try_into()
-        .expect("Negative row group ordinal");
+        .map_err(|_| {
+            ParquetError::General(format!(
+                "Negative row group ordinal: {})",
+                row_group.ordinal().unwrap()
+            ))
+        })?;
     let row_group_idx = row_group_idx as usize;
     for (column_idx, column_chunk) in 
row_group.columns_mut().iter_mut().enumerate() {
         if let Some(bloom_filter) = 
bloom_filters[row_group_idx][column_idx].take() {
@@ -1892,6 +1897,44 @@ mod tests {
         assert_eq!(page_sizes[0], unenc_size);
     }
 
+    #[test]
+    fn test_too_many_rowgroups() {
+        let message_type = "
+            message test_schema {
+                REQUIRED BYTE_ARRAY a (UTF8);
+            }
+        ";
+        let schema = Arc::new(parse_message_type(message_type).unwrap());
+        let file: File = tempfile::tempfile().unwrap();
+        let props = Arc::new(
+            WriterProperties::builder()
+                .set_statistics_enabled(EnabledStatistics::None)
+                .set_max_row_group_size(1)
+                .build(),
+        );
+        let mut writer = SerializedFileWriter::new(&file, schema, 
props).unwrap();
+
+        // Create 32k empty rowgroups. Should error when i == 32768.
+        for i in 0..0x8001 {
+            match writer.next_row_group() {
+                Ok(mut row_group_writer) => {
+                    assert_ne!(i, 0x8000);
+                    let col_writer = 
row_group_writer.next_column().unwrap().unwrap();
+                    col_writer.close().unwrap();
+                    row_group_writer.close().unwrap();
+                }
+                Err(e) => {
+                    assert_eq!(i, 0x8000);
+                    assert_eq!(
+                        e.to_string(),
+                        "Parquet error: Parquet does not support more than 
32767 row groups per file (currently: 32768)"
+                    );
+                }
+            }
+        }
+        writer.close().unwrap();
+    }
+
     #[test]
     fn test_size_statistics_with_repetition_and_nulls() {
         let message_type = "

Reply via email to