This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 55fe847691 Return error rather than panic when too many row groups are
written (#6629)
55fe847691 is described below
commit 55fe847691fad99d06576c9b39cb45ceeff2fba4
Author: Ed Seidl <[email protected]>
AuthorDate: Mon Oct 28 15:35:36 2024 -0700
Return error rather than panic when too many row groups are written (#6629)
* return error rather than panic when too many row groups
* clean up test a bit
---
parquet/src/file/writer.rs | 45 ++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 44 insertions(+), 1 deletion(-)
diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index 95ff109a3d..b84c57a60e 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -378,7 +378,12 @@ fn write_bloom_filters<W: Write + Send>(
.ordinal()
.expect("Missing row group ordinal")
.try_into()
- .expect("Negative row group ordinal");
+ .map_err(|_| {
+ ParquetError::General(format!(
+ "Negative row group ordinal: {})",
+ row_group.ordinal().unwrap()
+ ))
+ })?;
let row_group_idx = row_group_idx as usize;
for (column_idx, column_chunk) in
row_group.columns_mut().iter_mut().enumerate() {
if let Some(bloom_filter) =
bloom_filters[row_group_idx][column_idx].take() {
@@ -1892,6 +1897,44 @@ mod tests {
assert_eq!(page_sizes[0], unenc_size);
}
+ #[test]
+ fn test_too_many_rowgroups() {
+ let message_type = "
+ message test_schema {
+ REQUIRED BYTE_ARRAY a (UTF8);
+ }
+ ";
+ let schema = Arc::new(parse_message_type(message_type).unwrap());
+ let file: File = tempfile::tempfile().unwrap();
+ let props = Arc::new(
+ WriterProperties::builder()
+ .set_statistics_enabled(EnabledStatistics::None)
+ .set_max_row_group_size(1)
+ .build(),
+ );
+ let mut writer = SerializedFileWriter::new(&file, schema,
props).unwrap();
+
+ // Create 32k empty rowgroups. Should error when i == 32768.
+ for i in 0..0x8001 {
+ match writer.next_row_group() {
+ Ok(mut row_group_writer) => {
+ assert_ne!(i, 0x8000);
+ let col_writer =
row_group_writer.next_column().unwrap().unwrap();
+ col_writer.close().unwrap();
+ row_group_writer.close().unwrap();
+ }
+ Err(e) => {
+ assert_eq!(i, 0x8000);
+ assert_eq!(
+ e.to_string(),
+ "Parquet error: Parquet does not support more than
32767 row groups per file (currently: 32768)"
+ );
+ }
+ }
+ }
+ writer.close().unwrap();
+ }
+
#[test]
fn test_size_statistics_with_repetition_and_nulls() {
let message_type = "