This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new e1513de1812 branch-2.0 [fix](arrow) fix arrow parquet writer exception 
handling (#50191)
e1513de1812 is described below

commit e1513de181248b1dcef0fe8e977bf041a854938b
Author: Mingyu Chen (Rayner) <[email protected]>
AuthorDate: Sat Apr 19 00:10:54 2025 -0700

    branch-2.0 [fix](arrow) fix arrow parquet writer exception handling (#50191)
    
    see https://github.com/apache/arrow/pull/35520
---
 .github/workflows/build-thirdparty.yml             | 21 ++++++++++++++++---
 thirdparty/download-thirdparty.sh                  |  1 +
 .../apache-arrow-7.0.0-exception-handling.patch    | 24 ++++++++++++++++++++++
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build-thirdparty.yml 
b/.github/workflows/build-thirdparty.yml
index 991b5089035..2185237973a 100644
--- a/.github/workflows/build-thirdparty.yml
+++ b/.github/workflows/build-thirdparty.yml
@@ -87,7 +87,7 @@ jobs:
             'autoconf'
             'libtool-bin'
             'pkg-config'
-            'cmake'
+            'cmake=3.22.1-1ubuntu1.22.04.2'
             'ninja-build'
             'ccache'
             'python-is-python3'
@@ -107,6 +107,7 @@ jobs:
           )
 
           sudo apt update
+          sudo apt-cache policy cmake
           sudo DEBIAN_FRONTEND=noninteractive apt install --yes 
"${packages[@]}"
 
           mkdir -p "${DEFAULT_DIR}"
@@ -153,7 +154,6 @@ jobs:
             'coreutils'
             'gnu-getopt'
             'python@3'
-            'cmake'
             'ninja'
             'ccache'
             'bison'
@@ -167,8 +167,16 @@ jobs:
             'llvm@16'
           )
 
+          # Install packages except cmake
           brew install "${packages[@]}" || true
 
+          # Install specific version of cmake
+          brew unlink cmake || true
+          wget 
https://github.com/Kitware/CMake/releases/download/v3.22.1/cmake-3.22.1-macos-universal.tar.gz
+          tar -xzf cmake-3.22.1-macos-universal.tar.gz
+          sudo cp -r cmake-3.22.1-macos-universal/CMake.app/Contents/* 
/usr/local/
+          cmake --version
+
       - name: Build
         run: |
           export MACOSX_DEPLOYMENT_TARGET=12.0
@@ -204,7 +212,6 @@ jobs:
             'coreutils'
             'gnu-getopt'
             'python@3'
-            'cmake'
             'ninja'
             'ccache'
             'bison'
@@ -218,8 +225,16 @@ jobs:
             'llvm@16'
           )
 
+          # Install packages except cmake
           brew install "${packages[@]}" || true
 
+          # Install specific version of cmake
+          brew unlink cmake || true
+          wget 
https://github.com/Kitware/CMake/releases/download/v3.22.1/cmake-3.22.1-macos-universal.tar.gz
+          tar -xzf cmake-3.22.1-macos-universal.tar.gz
+          sudo cp -r cmake-3.22.1-macos-universal/CMake.app/Contents/* 
/usr/local/
+          cmake --version
+
       - name: Build
         run: |
           export MACOSX_DEPLOYMENT_TARGET=12.0
diff --git a/thirdparty/download-thirdparty.sh 
b/thirdparty/download-thirdparty.sh
index b020ec92fb2..e44d56ed1d0 100755
--- a/thirdparty/download-thirdparty.sh
+++ b/thirdparty/download-thirdparty.sh
@@ -329,6 +329,7 @@ if [[ "${ARROW_SOURCE}" == "apache-arrow-7.0.0" ]]; then
     cd "${TP_SOURCE_DIR}/${ARROW_SOURCE}"
     if [[ ! -f "${PATCHED_MARK}" ]]; then
         patch -p1 <"${TP_PATCH_DIR}/apache-arrow-7.0.0.patch"
+        patch -p1 
<"${TP_PATCH_DIR}/apache-arrow-7.0.0-exception-handling.patch"
         touch "${PATCHED_MARK}"
     fi
     cd -
diff --git a/thirdparty/patches/apache-arrow-7.0.0-exception-handling.patch 
b/thirdparty/patches/apache-arrow-7.0.0-exception-handling.patch
new file mode 100644
index 00000000000..4a301829a36
--- /dev/null
+++ b/thirdparty/patches/apache-arrow-7.0.0-exception-handling.patch
@@ -0,0 +1,24 @@
+diff --git a/cpp/src/parquet/file_writer.cc b/cpp/src/parquet/file_writer.cc
+index deac9586e..572e8bde7 100644
+--- a/cpp/src/parquet/file_writer.cc
++++ b/cpp/src/parquet/file_writer.cc
+@@ -181,15 +181,13 @@ class RowGroupSerializer : public 
RowGroupWriter::Contents {
+       closed_ = true;
+       CheckRowsWritten();
+ 
+-      for (size_t i = 0; i < column_writers_.size(); i++) {
+-        if (column_writers_[i]) {
+-          total_bytes_written_ += column_writers_[i]->Close();
+-          column_writers_[i].reset();
++      auto column_writers = std::move(column_writers_);
++      for (size_t i = 0; i < column_writers.size(); i++) {
++        if (column_writers[i]) {
++          total_bytes_written_ += column_writers[i]->Close();
+         }
+       }
+ 
+-      column_writers_.clear();
+-
+       // Ensures all columns have been written
+       metadata_->set_num_rows(num_rows_);
+       metadata_->Finish(total_bytes_written_, row_group_ordinal_);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to