This is an automated email from the ASF dual-hosted git repository.

raulcd pushed a commit to branch maint-12.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 05d82b4ea8ffcb38f9a6b6ce80e890a3de0b4b65
Author: mwish <[email protected]>
AuthorDate: Tue May 16 00:32:37 2023 +0800

    GH-35519: [C++][Parquet] Fixing exception handling in parquet 
FileSerializer (#35520)
    
    
    
    ### Rationale for this change
    
    Mentioned in https://github.com/apache/arrow/issues/35519
    
    ### What changes are included in this PR?
    
    Exception handling in `CheckRowsWritten`
    
    ### Are these changes tested?
    
    No, I don't know how to mock it currently
    
    ### Are there any user-facing changes?
    
    no
    
    * Closes: #35519
    
    Authored-by: mwish <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/parquet/file_writer.cc | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/cpp/src/parquet/file_writer.cc b/cpp/src/parquet/file_writer.cc
index f1098e4a74..913c99e091 100644
--- a/cpp/src/parquet/file_writer.cc
+++ b/cpp/src/parquet/file_writer.cc
@@ -219,17 +219,16 @@ class RowGroupSerializer : public 
RowGroupWriter::Contents {
       closed_ = true;
       CheckRowsWritten();
 
-      for (size_t i = 0; i < column_writers_.size(); i++) {
-        if (column_writers_[i]) {
-          total_bytes_written_ += column_writers_[i]->Close();
+      // Avoid invalid state if ColumnWriter::Close() throws internally.
+      auto column_writers = std::move(column_writers_);
+      for (size_t i = 0; i < column_writers.size(); i++) {
+        if (column_writers[i]) {
+          total_bytes_written_ += column_writers[i]->Close();
           total_compressed_bytes_written_ +=
-              column_writers_[i]->total_compressed_bytes_written();
-          column_writers_[i].reset();
+              column_writers[i]->total_compressed_bytes_written();
         }
       }
 
-      column_writers_.clear();
-
       // Ensures all columns have been written
       metadata_->set_num_rows(num_rows_);
       metadata_->Finish(total_bytes_written_, row_group_ordinal_);
@@ -261,8 +260,10 @@ class RowGroupSerializer : public RowGroupWriter::Contents 
{
       }
     } else if (buffered_row_group_ &&
                column_writers_.size() > 0) {  // when buffered_row_group = true
+      DCHECK(column_writers_[0] != nullptr);
       int64_t current_col_rows = column_writers_[0]->rows_written();
       for (int i = 1; i < static_cast<int>(column_writers_.size()); i++) {
+        DCHECK(column_writers_[i] != nullptr);
         int64_t current_col_rows_i = column_writers_[i]->rows_written();
         if (current_col_rows != current_col_rows_i) {
           ThrowRowsMisMatchError(i, current_col_rows_i, current_col_rows);
@@ -380,7 +381,7 @@ class FileSerializer : public ParquetFileWriter::Contents {
   void AddKeyValueMetadata(
       const std::shared_ptr<const KeyValueMetadata>& key_value_metadata) 
override {
     if (key_value_metadata_ == nullptr) {
-      key_value_metadata_ = std::move(key_value_metadata);
+      key_value_metadata_ = key_value_metadata;
     } else if (key_value_metadata != nullptr) {
       key_value_metadata_ = key_value_metadata_->Merge(*key_value_metadata);
     }
@@ -388,7 +389,7 @@ class FileSerializer : public ParquetFileWriter::Contents {
 
   ~FileSerializer() override {
     try {
-      Close();
+      FileSerializer::Close();
     } catch (...) {
     }
   }

Reply via email to