This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new f80bc5fa8e parquet writer: Raise an error when the row_group_index
overflows i16 (#6378)
f80bc5fa8e is described below
commit f80bc5fa8e6382391a6d25113a6d846e40a0b170
Author: Val Lorentz <[email protected]>
AuthorDate: Fri Sep 13 11:20:26 2024 +0200
parquet writer: Raise an error when the row_group_index overflows i16
(#6378)
This caused confusing panics down the line because 'ordinal' is
negative.
---
parquet/src/file/writer.rs | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index a23a2a3327..7b7bfa19c3 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -190,14 +190,25 @@ impl<W: Write + Send> SerializedFileWriter<W> {
/// Creates new row group from this file writer.
/// In case of IO error or Thrift error, returns `Err`.
///
- /// There is no limit on a number of row groups in a file; however, row
groups have
+ /// There can be at most 2^15 row groups in a file; and row groups have
/// to be written sequentially. Every time the next row group is
requested, the
/// previous row group must be finalised and closed using
`RowGroupWriter::close` method.
pub fn next_row_group(&mut self) -> Result<SerializedRowGroupWriter<'_,
W>> {
self.assert_previous_writer_closed()?;
let ordinal = self.row_group_index;
- self.row_group_index += 1;
+ let ordinal: i16 = ordinal.try_into().map_err(|_| {
+ ParquetError::General(format!(
+ "Parquet does not support more than {} row groups per file
(currently: {})",
+ i16::MAX,
+ ordinal
+ ))
+ })?;
+
+ self.row_group_index = self
+ .row_group_index
+ .checked_add(1)
+ .expect("SerializedFileWriter::row_group_index overflowed");
let bloom_filter_position = self.properties().bloom_filter_position();
let row_groups = &mut self.row_groups;
@@ -227,7 +238,7 @@ impl<W: Write + Send> SerializedFileWriter<W> {
self.descr.clone(),
self.props.clone(),
&mut self.buf,
- ordinal as i16,
+ ordinal,
Some(Box::new(on_close)),
);
Ok(row_group_writer)