This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new c59189cd2ba [fix](build) Backport thirdparty updates for Arrow LZO 
(#65191)
c59189cd2ba is described below

commit c59189cd2baaf2ddec54b02ffeed0d9d342a8291
Author: Gabriel <[email protected]>
AuthorDate: Fri Jul 3 14:55:14 2026 +0800

    [fix](build) Backport thirdparty updates for Arrow LZO (#65191)
    
    ### What problem does this PR solve?
    
    Issue Number: None
    
    Related PR: #65046
    
    Problem Summary: Branch 4.1 shares the rebuilt Doris thirdparty
    artifacts with master. PR #65046 added Parquet LZO page decompression
    support to the Arrow/Parquet thirdparty patch, which introduces lzo
    symbols from libparquet.a. Branch 4.1 already builds lzo2 as a
    thirdparty package, but its BE CMake thirdparty list did not link lzo2,
    so builds using the shared updated libparquet.a can fail with unresolved
    lzo symbols.
    
    This backports the thirdparty-related changes from #65046: the Arrow LZO
    patch, lzo2 linkage, FlatBuffers version update and include cleanup, and
    Arrow include path setup during thirdparty build.
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test: Manual test
        - `git diff --cached --check`
    - `bash -n thirdparty/build-thirdparty.sh
    thirdparty/download-thirdparty.sh thirdparty/vars.sh`
    - Behavior changed: No
    - Does this need documentation: No
---
 be/cmake/thirdparty.cmake                        |  1 +
 thirdparty/build-thirdparty.sh                   |  5 +-
 thirdparty/download-thirdparty.sh                |  2 +
 thirdparty/patches/apache-arrow-17.0.0-lzo.patch | 84 ++++++++++++++++++++++++
 thirdparty/vars.sh                               |  8 +--
 5 files changed, 95 insertions(+), 5 deletions(-)

diff --git a/be/cmake/thirdparty.cmake b/be/cmake/thirdparty.cmake
index 227f81411f1..8aa4ae73020 100644
--- a/be/cmake/thirdparty.cmake
+++ b/be/cmake/thirdparty.cmake
@@ -66,6 +66,7 @@ add_thirdparty(gmock)
 add_thirdparty(snappy)
 add_thirdparty(curl)
 add_thirdparty(lz4)
+add_thirdparty(lzo2)
 add_thirdparty(thrift)
 add_thirdparty(thriftnb)
 add_thirdparty(crc32c)
diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh
index 50f882d0b72..c8b4afdb6a1 100755
--- a/thirdparty/build-thirdparty.sh
+++ b/thirdparty/build-thirdparty.sh
@@ -995,6 +995,7 @@ build_flatbuffers() {
     "${BUILD_SYSTEM}" -j "${PARALLEL}"
 
     cp flatc ../../../installed/bin/flatc
+    rm -rf ../../../installed/include/flatbuffers
     cp -r ../include/flatbuffers ../../../installed/include/flatbuffers
     cp libflatbuffers.a ../../../installed/lib/libflatbuffers.a
 }
@@ -1084,7 +1085,9 @@ build_arrow() {
         ldflags="-L${TP_LIB_DIR}"
     fi
 
-    LDFLAGS="${ldflags}" \
+    CPPFLAGS="-I${TP_INCLUDE_DIR}" \
+        CXXFLAGS="-I${TP_INCLUDE_DIR}" \
+        LDFLAGS="${ldflags}" \
         "${CMAKE_CMD}" -DCMAKE_POLICY_VERSION_MINIMUM=3.5 \
         -G "${GENERATOR}" -DARROW_PARQUET=ON -DARROW_IPC=ON 
-DARROW_BUILD_SHARED=OFF \
         -DARROW_BUILD_STATIC=ON -DARROW_WITH_BROTLI=ON -DARROW_WITH_LZ4=ON 
-DARROW_USE_GLOG=ON \
diff --git a/thirdparty/download-thirdparty.sh 
b/thirdparty/download-thirdparty.sh
index a61f520ea35..feb94de4bdd 100755
--- a/thirdparty/download-thirdparty.sh
+++ b/thirdparty/download-thirdparty.sh
@@ -455,6 +455,8 @@ if [[ " ${TP_ARCHIVES[*]} " =~ " ARROW " ]]; then
             # std::string objects in RELRO, then crash while initializing them.
             patch -p1 
<"${TP_PATCH_DIR}/apache-arrow-17.0.0-status-inline-static-fix.patch"
 
+            # Add Parquet LZO page decompression support used by file scanner 
v2.
+            patch -p1 <"${TP_PATCH_DIR}/apache-arrow-17.0.0-lzo.patch"
             touch "${PATCHED_MARK}"
         fi
         cd -
diff --git a/thirdparty/patches/apache-arrow-17.0.0-lzo.patch 
b/thirdparty/patches/apache-arrow-17.0.0-lzo.patch
new file mode 100644
index 00000000000..a983818413a
--- /dev/null
+++ b/thirdparty/patches/apache-arrow-17.0.0-lzo.patch
@@ -0,0 +1,84 @@
+--- a/cpp/src/parquet/column_reader.cc
++++ b/cpp/src/parquet/column_reader.cc
+@@ -30,0 +31,2 @@
++
++#include <lzo/lzo1x.h>
+@@ -268,0 +269 @@
++        compression_codec_(codec),
+@@ -279 +282,7 @@
+-    decompressor_ = GetCodec(codec);
++    if (compression_codec_ == Compression::LZO) {
++      if (lzo_init() != LZO_E_OK) {
++        throw ParquetException("Failed to initialize LZO codec");
++      }
++    } else {
++      decompressor_ = GetCodec(codec);
++    }
+@@ -315,0 +325 @@
++  Compression::type compression_codec_;
+@@ -585 +595 @@
+-  if (decompressor_ == nullptr) {
++  if (decompressor_ == nullptr && compression_codec_ != Compression::LZO) {
+@@ -601,0 +612,61 @@
++  if (compression_codec_ == Compression::LZO) {
++    const uint8_t* input = page_buffer->data() + levels_byte_len;
++    const uint8_t* const input_end = page_buffer->data() + compressed_len;
++    uint8_t* output = decompression_buffer_->mutable_data() + levels_byte_len;
++    uint8_t* const output_end = decompression_buffer_->mutable_data() + 
uncompressed_len;
++
++    auto load_big_endian_u32 = [](const uint8_t* data) {
++      return (static_cast<uint32_t>(data[0]) << 24) |
++             (static_cast<uint32_t>(data[1]) << 16) |
++             (static_cast<uint32_t>(data[2]) << 8) | 
static_cast<uint32_t>(data[3]);
++    };
++
++    while (input < input_end) {
++      if (input_end - input < 4) {
++        throw ParquetException("LZO page decompression failed: truncated 
large block length");
++      }
++
++      uint32_t large_block_uncompressed_len = load_big_endian_u32(input);
++      input += 4;
++      if (static_cast<size_t>(output_end - output) < 
large_block_uncompressed_len) {
++        throw ParquetException("LZO page decompression failed: output buffer 
too small");
++      }
++
++      while (large_block_uncompressed_len > 0) {
++        if (input_end - input < 4) {
++          throw ParquetException("LZO page decompression failed: truncated 
small block length");
++        }
++
++        uint32_t small_block_compressed_len = load_big_endian_u32(input);
++        input += 4;
++        if (static_cast<size_t>(input_end - input) < 
small_block_compressed_len) {
++          throw ParquetException("LZO page decompression failed: truncated 
small block data");
++        }
++
++        auto small_block_uncompressed_len =
++            static_cast<lzo_uint>(large_block_uncompressed_len);
++        const int result =
++            lzo1x_decompress_safe(input, 
static_cast<lzo_uint>(small_block_compressed_len),
++                                  output, &small_block_uncompressed_len, 
nullptr);
++        if (result != LZO_E_OK) {
++          throw ParquetException("LZO page decompression failed, error: " +
++                                 std::to_string(result));
++        }
++        if (small_block_uncompressed_len > large_block_uncompressed_len) {
++          throw ParquetException("LZO page decompression failed: invalid 
small block size");
++        }
++
++        input += small_block_compressed_len;
++        output += small_block_uncompressed_len;
++        large_block_uncompressed_len -= small_block_uncompressed_len;
++      }
++    }
++    if (output != output_end) {
++      throw ParquetException("Page didn't decompress to expected size, 
expected: " +
++                             std::to_string(uncompressed_len - 
levels_byte_len) + ", but got:" +
++                             std::to_string(output - 
(decompression_buffer_->mutable_data() +
++                                                      levels_byte_len)));
++    }
++
++    return decompression_buffer_;
++  }
++
diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh
index 155a4f6c898..77ed4e32e35 100644
--- a/thirdparty/vars.sh
+++ b/thirdparty/vars.sh
@@ -242,10 +242,10 @@ BROTLI_SOURCE="brotli-1.0.9"
 BROTLI_MD5SUM="c2274f0c7af8470ad514637c35bcee7d"
 
 # flatbuffers
-FLATBUFFERS_DOWNLOAD="https://github.com/google/flatbuffers/archive/v2.0.0.tar.gz";
-FLATBUFFERS_NAME=flatbuffers-2.0.0.tar.gz
-FLATBUFFERS_SOURCE=flatbuffers-2.0.0
-FLATBUFFERS_MD5SUM="a27992324c3cbf86dd888268a23d17bd"
+FLATBUFFERS_DOWNLOAD="https://github.com/google/flatbuffers/archive/v23.5.26.tar.gz";
+FLATBUFFERS_NAME=flatbuffers-23.5.26.tar.gz
+FLATBUFFERS_SOURCE=flatbuffers-23.5.26
+FLATBUFFERS_MD5SUM="2ef00eaaa86ab5e9ad5eafe09c2e7b60"
 
 # c-ares
 
CARES_DOWNLOAD="https://github.com/c-ares/c-ares/releases/download/cares-1_19_1/c-ares-1.19.1.tar.gz";


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to