This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 2d76d9a526 GH-35519: [C++][Parquet] Fixing exception handling in
parquet FileSerializer (#35520)
2d76d9a526 is described below
commit 2d76d9a526f9827283bb7dfac60715b6ad4aec34
Author: mwish <[email protected]>
AuthorDate: Tue May 16 00:32:37 2023 +0800
GH-35519: [C++][Parquet] Fixing exception handling in parquet
FileSerializer (#35520)
### Rationale for this change
Mentioned in https://github.com/apache/arrow/issues/35519
### What changes are included in this PR?
Exception handling in `CheckRowsWritten`
### Are these changes tested?
No, I don't know how to mock it currently
### Are there any user-facing changes?
no
* Closes: #35519
Authored-by: mwish <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/parquet/file_writer.cc | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/cpp/src/parquet/file_writer.cc b/cpp/src/parquet/file_writer.cc
index 57067bc533..c91567e70d 100644
--- a/cpp/src/parquet/file_writer.cc
+++ b/cpp/src/parquet/file_writer.cc
@@ -219,17 +219,16 @@ class RowGroupSerializer : public
RowGroupWriter::Contents {
closed_ = true;
CheckRowsWritten();
- for (size_t i = 0; i < column_writers_.size(); i++) {
- if (column_writers_[i]) {
- total_bytes_written_ += column_writers_[i]->Close();
+ // Avoid invalid state if ColumnWriter::Close() throws internally.
+ auto column_writers = std::move(column_writers_);
+ for (size_t i = 0; i < column_writers.size(); i++) {
+ if (column_writers[i]) {
+ total_bytes_written_ += column_writers[i]->Close();
total_compressed_bytes_written_ +=
- column_writers_[i]->total_compressed_bytes_written();
- column_writers_[i].reset();
+ column_writers[i]->total_compressed_bytes_written();
}
}
- column_writers_.clear();
-
// Ensures all columns have been written
metadata_->set_num_rows(num_rows_);
metadata_->Finish(total_bytes_written_, row_group_ordinal_);
@@ -261,8 +260,10 @@ class RowGroupSerializer : public RowGroupWriter::Contents
{
}
} else if (buffered_row_group_ &&
column_writers_.size() > 0) { // when buffered_row_group = true
+ DCHECK(column_writers_[0] != nullptr);
int64_t current_col_rows = column_writers_[0]->rows_written();
for (int i = 1; i < static_cast<int>(column_writers_.size()); i++) {
+ DCHECK(column_writers_[i] != nullptr);
int64_t current_col_rows_i = column_writers_[i]->rows_written();
if (current_col_rows != current_col_rows_i) {
ThrowRowsMisMatchError(i, current_col_rows_i, current_col_rows);
@@ -380,7 +381,7 @@ class FileSerializer : public ParquetFileWriter::Contents {
void AddKeyValueMetadata(
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata)
override {
if (key_value_metadata_ == nullptr) {
- key_value_metadata_ = std::move(key_value_metadata);
+ key_value_metadata_ = key_value_metadata;
} else if (key_value_metadata != nullptr) {
key_value_metadata_ = key_value_metadata_->Merge(*key_value_metadata);
}
@@ -388,7 +389,7 @@ class FileSerializer : public ParquetFileWriter::Contents {
~FileSerializer() override {
try {
- Close();
+ FileSerializer::Close();
} catch (...) {
}
}