This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new f80bc5fa8e parquet writer: Raise an error when the row_group_index 
overflows i16 (#6378)
f80bc5fa8e is described below

commit f80bc5fa8e6382391a6d25113a6d846e40a0b170
Author: Val Lorentz <[email protected]>
AuthorDate: Fri Sep 13 11:20:26 2024 +0200

    parquet writer: Raise an error when the row_group_index overflows i16 
(#6378)
    
    This caused confusing panics down the line because 'ordinal' is
    negative.
---
 parquet/src/file/writer.rs | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index a23a2a3327..7b7bfa19c3 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -190,14 +190,25 @@ impl<W: Write + Send> SerializedFileWriter<W> {
     /// Creates new row group from this file writer.
     /// In case of IO error or Thrift error, returns `Err`.
     ///
-    /// There is no limit on a number of row groups in a file; however, row 
groups have
+    /// There can be at most 2^15 row groups in a file; and row groups have
     /// to be written sequentially. Every time the next row group is 
requested, the
     /// previous row group must be finalised and closed using 
`RowGroupWriter::close` method.
     pub fn next_row_group(&mut self) -> Result<SerializedRowGroupWriter<'_, 
W>> {
         self.assert_previous_writer_closed()?;
         let ordinal = self.row_group_index;
 
-        self.row_group_index += 1;
+        let ordinal: i16 = ordinal.try_into().map_err(|_| {
+            ParquetError::General(format!(
+                "Parquet does not support more than {} row groups per file 
(currently: {})",
+                i16::MAX,
+                ordinal
+            ))
+        })?;
+
+        self.row_group_index = self
+            .row_group_index
+            .checked_add(1)
+            .expect("SerializedFileWriter::row_group_index overflowed");
 
         let bloom_filter_position = self.properties().bloom_filter_position();
         let row_groups = &mut self.row_groups;
@@ -227,7 +238,7 @@ impl<W: Write + Send> SerializedFileWriter<W> {
             self.descr.clone(),
             self.props.clone(),
             &mut self.buf,
-            ordinal as i16,
+            ordinal,
             Some(Box::new(on_close)),
         );
         Ok(row_group_writer)

Reply via email to