This is an automated email from the ASF dual-hosted git repository.

leaves12138 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-cpp.git

commit 0a59d4496d25f9d8b90300c1590ad7130faaee79
Author: yonghao.fyh <[email protected]>
AuthorDate: Fri May 22 17:50:48 2026 +0800

    chore: add thirdparty toolchain and thirdparty diff
---
 cmake_modules/ThirdpartyToolchain.cmake | 1787 +++++++++++++++++++++++++++++++
 cmake_modules/arrow.diff                |  213 ++++
 cmake_modules/jieba.diff                |   16 +
 cmake_modules/orc.diff                  |  437 ++++++++
 4 files changed, 2453 insertions(+)

diff --git a/cmake_modules/ThirdpartyToolchain.cmake 
b/cmake_modules/ThirdpartyToolchain.cmake
new file mode 100644
index 0000000..2be9097
--- /dev/null
+++ b/cmake_modules/ThirdpartyToolchain.cmake
@@ -0,0 +1,1787 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(THIRDPARTY_LOG_OPTIONS
+    LOG_CONFIGURE
+    1
+    LOG_BUILD
+    1
+    LOG_INSTALL
+    1
+    LOG_DOWNLOAD
+    1)
+set(THIRDPARTY_CONFIGURE_COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}")
+if(CMAKE_GENERATOR_TOOLSET)
+    list(APPEND THIRDPARTY_CONFIGURE_COMMAND -T "${CMAKE_GENERATOR_TOOLSET}")
+endif()
+
+string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE)
+string(TOLOWER ${CMAKE_BUILD_TYPE} LOWERCASE_BUILD_TYPE)
+
+set(EP_COMMON_TOOLCHAIN "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
+                        "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}")
+
+macro(set_urls URLS)
+    set(${URLS} ${ARGN})
+endmacro()
+
+set(THIRDPARTY_DIR "${CMAKE_SOURCE_DIR}/third_party")
+# Read toolchain versions from third_party/versions.txt
+file(STRINGS "${THIRDPARTY_DIR}/versions.txt" TOOLCHAIN_VERSIONS_TXT)
+foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT})
+    # Exclude comments
+    if(NOT
+       ((_VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_VERSION=")
+        OR (_VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_CHECKSUM=")
+        OR (_VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_PKG_NAME=")))
+        continue()
+    endif()
+
+    string(REGEX MATCH "^[^=]*" _VARIABLE_NAME ${_VERSION_ENTRY})
+    string(REPLACE "${_VARIABLE_NAME}=" "" _VARIABLE_VALUE ${_VERSION_ENTRY})
+
+    # Skip blank or malformed lines
+    if(_VARIABLE_VALUE STREQUAL "")
+        continue()
+    endif()
+
+    set(${_VARIABLE_NAME} ${_VARIABLE_VALUE})
+
+    if(_VARIABLE_NAME MATCHES "_PKG_NAME$")
+        # Expand version to package name
+        string(CONFIGURE "${${_VARIABLE_NAME}}" _EXPANDED ESCAPE_QUOTES)
+        set(${_VARIABLE_NAME} "${_EXPANDED}")
+    endif()
+
+    # For debugging
+    message(STATUS "${_VARIABLE_NAME}: ${${_VARIABLE_NAME}}")
+endforeach()
+
+if(DEFINED ENV{PAIMON_THIRDPARTY_MIRROR_URL})
+    set(THIRDPARTY_MIRROR_URL "$ENV{PAIMON_THIRDPARTY_MIRROR_URL}")
+else()
+    set(THIRDPARTY_MIRROR_URL "")
+endif()
+
+if(DEFINED ENV{PAIMON_ARROW_URL})
+    set(ARROW_SOURCE_URL "$ENV{PAIMON_ARROW_URL}")
+else()
+    if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_ARROW_PKG_NAME}")
+        set_urls(ARROW_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_ARROW_PKG_NAME}")
+    else()
+        set_urls(ARROW_SOURCE_URL
+                 
"${THIRDPARTY_MIRROR_URL}https://github.com/apache/arrow/releases/download/apache-arrow-${PAIMON_ARROW_BUILD_VERSION}/apache-arrow-${PAIMON_ARROW_BUILD_VERSION}.tar.gz";
+        )
+    endif()
+endif()
+
+if(DEFINED ENV{PAIMON_RAPIDJSON_URL})
+    set(RAPIDJSON_SOURCE_URL "$ENV{PAIMON_RAPIDJSON_URL}")
+else()
+    if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_RAPIDJSON_PKG_NAME}")
+        set_urls(RAPIDJSON_SOURCE_URL 
"${THIRDPARTY_DIR}/${PAIMON_RAPIDJSON_PKG_NAME}")
+    else()
+        set_urls(RAPIDJSON_SOURCE_URL
+                 
"${THIRDPARTY_MIRROR_URL}https://github.com/miloyip/rapidjson/archive/${PAIMON_RAPIDJSON_BUILD_VERSION}.tar.gz";
+        )
+    endif()
+endif()
+
+if(DEFINED ENV{PAIMON_FMT_URL})
+    set(FMT_SOURCE_URL "$ENV{PAIMON_FMT_URL}")
+else()
+    if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_FMT_PKG_NAME}")
+        set_urls(FMT_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_FMT_PKG_NAME}")
+    else()
+        set_urls(FMT_SOURCE_URL
+                 
"${THIRDPARTY_MIRROR_URL}https://github.com/fmtlib/fmt/archive/refs/tags/${PAIMON_FMT_BUILD_VERSION}.tar.gz";
+        )
+    endif()
+endif()
+
+if(DEFINED ENV{PAIMON_LUCENE_URL})
+    set(LUCENE_SOURCE_URL "$ENV{PAIMON_LUCENE_URL}")
+else()
+    if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_LUCENE_PKG_NAME}")
+        set_urls(LUCENE_SOURCE_URL 
"${THIRDPARTY_DIR}/${PAIMON_LUCENE_PKG_NAME}")
+    else()
+        set_urls(LUCENE_SOURCE_URL
+                 
"${THIRDPARTY_MIRROR_URL}https://github.com/luceneplusplus/LucenePlusPlus/archive/refs/tags/rel_${PAIMON_LUCENE_BUILD_VERSION}.tar.gz";
+        )
+    endif()
+endif()
+
+if(DEFINED ENV{PAIMON_LIMONP_URL})
+    set(LIMONP_SOURCE_URL "$ENV{PAIMON_LIMONP_URL}")
+else()
+    if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_LIMONP_PKG_NAME}")
+        set_urls(LIMONP_SOURCE_URL 
"${THIRDPARTY_DIR}/${PAIMON_LIMONP_PKG_NAME}")
+    else()
+        set_urls(LIMONP_SOURCE_URL
+                 
"${THIRDPARTY_MIRROR_URL}https://github.com/yanyiwu/limonp/archive/refs/tags/v${PAIMON_LIMONP_BUILD_VERSION}.tar.gz";
+        )
+    endif()
+endif()
+
+if(DEFINED ENV{PAIMON_JIEBA_URL})
+    set(JIEBA_SOURCE_URL "$ENV{PAIMON_JIEBA_URL}")
+else()
+    if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_JIEBA_PKG_NAME}")
+        set_urls(JIEBA_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_JIEBA_PKG_NAME}")
+    else()
+        set_urls(JIEBA_SOURCE_URL
+                 
"${THIRDPARTY_MIRROR_URL}https://github.com/yanyiwu/cppjieba/archive/refs/tags/${PAIMON_JIEBA_BUILD_VERSION}.tar.gz";
+        )
+    endif()
+endif()
+
+if(DEFINED ENV{PAIMON_RE2_URL})
+    set(RE2_SOURCE_URL "$ENV{PAIMON_RE2_URL}")
+else()
+    if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_RE2_PKG_NAME}")
+        set_urls(RE2_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_RE2_PKG_NAME}")
+    else()
+        set_urls(RE2_SOURCE_URL
+                 
"${THIRDPARTY_MIRROR_URL}https://github.com/google/re2/archive/${PAIMON_RE2_BUILD_VERSION}.tar.gz";
+        )
+    endif()
+endif()
+
+if(DEFINED ENV{PAIMON_GLOG_URL})
+    set(GLOG_SOURCE_URL "$ENV{PAIMON_GLOG_URL}")
+else()
+    if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_GLOG_PKG_NAME}")
+        set_urls(GLOG_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_GLOG_PKG_NAME}")
+    else()
+        set_urls(GLOG_SOURCE_URL
+                 
"${THIRDPARTY_MIRROR_URL}https://github.com/google/glog/archive/${PAIMON_GLOG_BUILD_VERSION}.tar.gz";
+        )
+    endif()
+endif()
+
+if(DEFINED ENV{PAIMON_ZLIB_URL})
+    set(ZLIB_SOURCE_URL "$ENV{PAIMON_ZLIB_URL}")
+else()
+    if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_ZLIB_PKG_NAME}")
+        set_urls(ZLIB_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_ZLIB_PKG_NAME}")
+    else()
+        set_urls(ZLIB_SOURCE_URL
+                 
"${THIRDPARTY_MIRROR_URL}https://github.com/madler/zlib/releases/download/v${PAIMON_ZLIB_BUILD_VERSION}/zlib-${PAIMON_ZLIB_BUILD_VERSION}.tar.gz";
+        )
+    endif()
+endif()
+
+if(DEFINED ENV{PAIMON_ZSTD_URL})
+    set(ZSTD_SOURCE_URL "$ENV{PAIMON_ZSTD_URL}")
+else()
+    if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_ZSTD_PKG_NAME}")
+        set_urls(ZSTD_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_ZSTD_PKG_NAME}")
+    else()
+        set_urls(ZSTD_SOURCE_URL
+                 
"${THIRDPARTY_MIRROR_URL}https://github.com/facebook/zstd/releases/download/v${PAIMON_ZSTD_BUILD_VERSION}/zstd-${PAIMON_ZSTD_BUILD_VERSION}.tar.gz";
+        )
+    endif()
+endif()
+
+if(DEFINED ENV{PAIMON_LZ4_URL})
+    set(LZ4_SOURCE_URL "$ENV{PAIMON_LZ4_URL}")
+else()
+    if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_LZ4_PKG_NAME}")
+        set_urls(LZ4_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_LZ4_PKG_NAME}")
+    else()
+        set_urls(LZ4_SOURCE_URL
+                 
"${THIRDPARTY_MIRROR_URL}https://github.com/lz4/lz4/archive/${PAIMON_LZ4_BUILD_VERSION}.tar.gz";
+        )
+    endif()
+endif()
+
+if(DEFINED ENV{PAIMON_SNAPPY_URL})
+    set(SNAPPY_SOURCE_URL "$ENV{PAIMON_SNAPPY_URL}")
+else()
+    if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_SNAPPY_PKG_NAME}")
+        set_urls(SNAPPY_SOURCE_URL 
"${THIRDPARTY_DIR}/${PAIMON_SNAPPY_PKG_NAME}")
+    else()
+        set_urls(SNAPPY_SOURCE_URL
+                 
"${THIRDPARTY_MIRROR_URL}https://github.com/google/snappy/archive/${PAIMON_SNAPPY_BUILD_VERSION}.tar.gz";
+        )
+    endif()
+endif()
+
+if(DEFINED ENV{PAIMON_PROTOBUF_URL})
+    set(PROTOBUF_SOURCE_URL "$ENV{PAIMON_PROTOBUF_URL}")
+else()
+    if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_PROTOBUF_PKG_NAME}")
+        set_urls(PROTOBUF_SOURCE_URL 
"${THIRDPARTY_DIR}/${PAIMON_PROTOBUF_PKG_NAME}")
+    else()
+        set_urls(PROTOBUF_SOURCE_URL
+                 
"${THIRDPARTY_MIRROR_URL}https://github.com/protocolbuffers/protobuf/releases/download/v${PAIMON_PROTOBUF_BUILD_VERSION}/protobuf-all-${PAIMON_PROTOBUF_BUILD_VERSION}.tar.gz";
+        )
+    endif()
+endif()
+
+if(DEFINED ENV{PAIMON_GTEST_URL})
+    set(GTEST_SOURCE_URL "$ENV{PAIMON_GTEST_URL}")
+else()
+    if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_GTEST_PKG_NAME}")
+        set_urls(GTEST_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_GTEST_PKG_NAME}")
+    else()
+        set_urls(GTEST_SOURCE_URL
+                 
"${THIRDPARTY_MIRROR_URL}https://github.com/google/googletest/archive/release-${PAIMON_GTEST_BUILD_VERSION}.tar.gz";
+        )
+    endif()
+endif()
+
+if(DEFINED ENV{PAIMON_TBB_URL})
+    set(TBB_SOURCE_URL "$ENV{PAIMON_TBB_URL}")
+else()
+    if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_TBB_PKG_NAME}")
+        set_urls(TBB_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_TBB_PKG_NAME}")
+    else()
+        set_urls(TBB_SOURCE_URL
+                 
"${THIRDPARTY_MIRROR_URL}https://github.com/uxlfoundation/oneTBB/archive/refs/tags/${PAIMON_TBB_BUILD_VERSION}.tar.gz";
+        )
+    endif()
+endif()
+
+if(DEFINED ENV{PAIMON_ORC_URL})
+    set(ORC_SOURCE_URL "$ENV{PAIMON_ORC_URL}")
+else()
+    if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_ORC_PKG_NAME}")
+        set_urls(ORC_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_ORC_PKG_NAME}")
+    else()
+        set_urls(ORC_SOURCE_URL
+                 
"${THIRDPARTY_MIRROR_URL}https://github.com/apache/orc/archive/refs/tags/${PAIMON_ORC_BUILD_VERSION}.tar.gz";
+        )
+    endif()
+endif()
+
+if(DEFINED ENV{PAIMON_AVRO_URL})
+    set(AVRO_SOURCE_URL "$ENV{PAIMON_AVRO_URL}")
+else()
+    if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_AVRO_PKG_NAME}")
+        set_urls(AVRO_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_AVRO_PKG_NAME}")
+    else()
+        set_urls(AVRO_SOURCE_URL
+                 
"${THIRDPARTY_MIRROR_URL}https://github.com/apache/avro/archive/${PAIMON_AVRO_BUILD_VERSION}.tar.gz";
+        )
+    endif()
+endif()
+
+if(APPLE)
+    set(JINDOSDK_C_DYNAMIC_LIB_NAME 
"jindosdk_c.${PAIMON_JINDOSDK_C_BUILD_VERSION}")
+    set(JINDOSDK_C_DYNAMIC_LIB_FILE "lib${JINDOSDK_C_DYNAMIC_LIB_NAME}.dylib")
+    if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|AMD64")
+        set(JINDOSDK_C_BUILD_SHA256_CHECKSUM
+            "${PAIMON_JINDOSDK_C_MACOS_X86_64_BUILD_SHA256_CHECKSUM}")
+        if(DEFINED ENV{PAIMON_JINDOSDK_C_MACOS_X86_64_URL})
+            set(JINDOSDK_C_SOURCE_URL 
"$ENV{PAIMON_JINDOSDK_C_MACOS_X86_64_URL}")
+        else()
+            set_urls(JINDOSDK_C_SOURCE_URL
+                     
"https://jindodata-binary.oss-cn-shanghai.aliyuncs.com/release/${PAIMON_JINDOSDK_C_BUILD_VERSION}/jindosdk-${PAIMON_JINDOSDK_C_BUILD_VERSION}-macos-11_0-x86_64.tar.gz";
+            )
+        endif()
+    elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64")
+        set(JINDOSDK_C_BUILD_SHA256_CHECKSUM
+            "${PAIMON_JINDOSDK_C_MACOS_AARCH64_BUILD_SHA256_CHECKSUM}")
+        if(DEFINED ENV{PAIMON_JINDOSDK_C_MACOS_AARCH64_URL})
+            set(JINDOSDK_C_SOURCE_URL 
"$ENV{PAIMON_JINDOSDK_C_MACOS_AARCH64_URL}")
+        else()
+            set_urls(JINDOSDK_C_SOURCE_URL
+                     
"https://jindodata-binary.oss-cn-shanghai.aliyuncs.com/release/${PAIMON_JINDOSDK_C_BUILD_VERSION}/jindosdk-${PAIMON_JINDOSDK_C_BUILD_VERSION}-macos-11_0-aarch64.tar.gz";
+            )
+        endif()
+    endif()
+else()
+    set(JINDOSDK_C_DYNAMIC_LIB_NAME "jindosdk_c")
+    set(JINDOSDK_C_DYNAMIC_LIB_FILE "lib${JINDOSDK_C_DYNAMIC_LIB_NAME}.so")
+    if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|AMD64")
+        set(JINDOSDK_C_BUILD_SHA256_CHECKSUM
+            "${PAIMON_JINDOSDK_C_LINUX_X86_64_BUILD_SHA256_CHECKSUM}")
+        if(DEFINED ENV{PAIMON_JINDOSDK_C_LINUX_X86_64_URL})
+            set(JINDOSDK_C_SOURCE_URL 
"$ENV{PAIMON_JINDOSDK_C_LINUX_X86_64_URL}")
+        else()
+            set_urls(JINDOSDK_C_SOURCE_URL
+                     
"https://jindodata-binary.oss-cn-shanghai.aliyuncs.com/release/${PAIMON_JINDOSDK_C_BUILD_VERSION}/jindosdk-${PAIMON_JINDOSDK_C_BUILD_VERSION}-linux.tar.gz";
+            )
+        endif()
+    elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64")
+        set(JINDOSDK_C_BUILD_SHA256_CHECKSUM
+            "${PAIMON_JINDOSDK_C_LINUX_AARCH64_BUILD_SHA256_CHECKSUM}")
+        if(DEFINED ENV{PAIMON_JINDOSDK_C_LINUX_AARCH64_URL})
+            set(JINDOSDK_C_SOURCE_URL 
"$ENV{PAIMON_JINDOSDK_C_LINUX_AARCH64_URL}")
+        else()
+            set_urls(JINDOSDK_C_SOURCE_URL
+                     
"https://jindodata-binary.oss-cn-shanghai.aliyuncs.com/release/${PAIMON_JINDOSDK_C_BUILD_VERSION}/jindosdk-${PAIMON_JINDOSDK_C_BUILD_VERSION}-linux-el7-aarch64.tar.gz";
+            )
+        endif()
+    endif()
+endif()
+
+set(EP_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+set(EP_C_FLAGS "${CMAKE_C_FLAGS}")
+string(REPLACE "-Wglobal-constructors" "" EP_CXX_FLAGS ${EP_CXX_FLAGS})
+string(REPLACE "-Wglobal-constructors" "" EP_C_FLAGS ${EP_C_FLAGS})
+# Remove coverage flags from third-party dependencies to avoid gcov dependency
+string(REPLACE "--coverage" "" EP_CXX_FLAGS ${EP_CXX_FLAGS})
+string(REPLACE "--coverage" "" EP_C_FLAGS ${EP_C_FLAGS})
+string(REPLACE "-DCOVERAGE_BUILD" "" EP_CXX_FLAGS ${EP_CXX_FLAGS})
+string(REPLACE "-DCOVERAGE_BUILD" "" EP_C_FLAGS ${EP_C_FLAGS})
+if(NOT MSVC_TOOLCHAIN)
+    # Set -fPIC on all external projects
+    string(APPEND EP_CXX_FLAGS
+           " -fPIC -Wno-error -Wno-sign-compare -Wno-ignored-attributes")
+    string(APPEND EP_C_FLAGS " -fPIC")
+endif()
+
+if(PAIMON_USE_CXX11_ABI)
+    string(APPEND EP_CXX_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=1")
+else()
+    string(APPEND EP_CXX_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=0")
+endif()
+
+# External projects are still able to override the following declarations.
+# cmake command line will favor the last defined variable when a duplicate is
+# encountered. This requires that `EP_COMMON_CMAKE_ARGS` is always the first
+# argument.
+set(EP_COMMON_CMAKE_ARGS
+    ${EP_COMMON_TOOLCHAIN}
+    -DBUILD_SHARED_LIBS=OFF
+    -DBUILD_STATIC_LIBS=ON
+    -DBUILD_TESTING=OFF
+    -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+    -DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}
+    -DCMAKE_C_FLAGS=${EP_C_FLAGS}
+    -DCMAKE_INSTALL_LIBDIR=lib)
+
+if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.30")
+    list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_POLICY_VERSION_MINIMUM=3.5)
+endif()
+
+set(PAIMON_DEPENDENCY_SOURCE_VALUES AUTO BUNDLED SYSTEM)
+
+function(paimon_validate_dependency_source SOURCE_VALUE OPTION_NAME)
+    string(TOUPPER "${SOURCE_VALUE}" _source)
+    list(FIND PAIMON_DEPENDENCY_SOURCE_VALUES "${_source}" _source_index)
+    if(_source_index EQUAL -1)
+        message(FATAL_ERROR "${OPTION_NAME} got invalid value 
'${SOURCE_VALUE}'. "
+                            "Allowed values: AUTO, BUNDLED, SYSTEM.")
+    endif()
+endfunction()
+
+function(paimon_get_dependency_source DEPENDENCY_NAME OUT_VAR)
+    set(_source_option_name "${DEPENDENCY_NAME}_SOURCE")
+    set(_source "${${DEPENDENCY_NAME}_SOURCE}")
+    if("${_source}" STREQUAL "")
+        get_property(_source GLOBAL PROPERTY 
"PAIMON_${DEPENDENCY_NAME}_DERIVED_SOURCE")
+        if("${_source}" STREQUAL "")
+            set(_source "${PAIMON_DEPENDENCY_SOURCE}")
+            set(_source_option_name "PAIMON_DEPENDENCY_SOURCE")
+        else()
+            set(_source_option_name "derived ${DEPENDENCY_NAME}_SOURCE")
+        endif()
+    endif()
+    string(TOUPPER "${_source}" _source)
+    paimon_validate_dependency_source("${_source}" "${_source_option_name}")
+    set(${OUT_VAR}
+        "${_source}"
+        PARENT_SCOPE)
+endfunction()
+
+function(paimon_set_dependency_source_default DEPENDENCY_NAME SOURCE_VALUE 
REASON)
+    if("${${DEPENDENCY_NAME}_SOURCE}" STREQUAL "")
+        string(TOUPPER "${SOURCE_VALUE}" _source)
+        paimon_validate_dependency_source("${_source}"
+                                          "derived ${DEPENDENCY_NAME}_SOURCE")
+        set_property(GLOBAL PROPERTY "PAIMON_${DEPENDENCY_NAME}_DERIVED_SOURCE"
+                                     "${_source}")
+        message(STATUS "Defaulting ${DEPENDENCY_NAME}_SOURCE to ${_source}: 
${REASON}")
+    endif()
+endfunction()
+
+function(paimon_apply_dependency_source_defaults)
+    paimon_get_dependency_source(Arrow _arrow_source)
+    if(_arrow_source STREQUAL "SYSTEM" OR _arrow_source STREQUAL "BUNDLED")
+        foreach(_dependency
+                zstd
+                Snappy
+                LZ4
+                ZLIB
+                RE2)
+            paimon_set_dependency_source_default(
+                ${_dependency} ${_arrow_source}
+                "follow Arrow_SOURCE to avoid mixed transitive dependencies")
+        endforeach()
+    elseif(_arrow_source STREQUAL "AUTO")
+        paimon_configure_dependency_root(Arrow "${_arrow_source}" 
_arrow_resolved_source)
+        find_package(ArrowAlt QUIET MODULE)
+        if(ArrowAlt_FOUND)
+            set(_arrow_dependency_default SYSTEM)
+            set(_arrow_dependency_reason
+                "system Arrow found during AUTO dependency precheck")
+        else()
+            set(_arrow_dependency_default BUNDLED)
+            set(_arrow_dependency_reason
+                "system Arrow not found during AUTO dependency precheck")
+        endif()
+        foreach(_dependency
+                zstd
+                Snappy
+                LZ4
+                ZLIB
+                RE2)
+            paimon_set_dependency_source_default(
+                ${_dependency} ${_arrow_dependency_default} 
"${_arrow_dependency_reason}")
+        endforeach()
+    endif()
+
+    if(PAIMON_ENABLE_ORC)
+        paimon_get_dependency_source(ORC _orc_source)
+        if(_orc_source STREQUAL "SYSTEM" OR _orc_source STREQUAL "BUNDLED")
+            paimon_set_dependency_source_default(
+                Protobuf ${_orc_source}
+                "follow ORC_SOURCE to avoid mixed transitive dependencies")
+        elseif(_orc_source STREQUAL "AUTO")
+            paimon_configure_dependency_root(ORC "${_orc_source}" 
_orc_resolved_source)
+            find_package(ORCAlt QUIET MODULE)
+            if(ORCAlt_FOUND)
+                paimon_set_dependency_source_default(
+                    Protobuf SYSTEM "system ORC found during AUTO dependency 
precheck")
+            else()
+                paimon_set_dependency_source_default(
+                    Protobuf BUNDLED
+                    "system ORC not found during AUTO dependency precheck")
+            endif()
+        endif()
+    endif()
+endfunction()
+
+function(paimon_configure_dependency_root DEPENDENCY_NAME SOURCE_VALUE 
OUT_SOURCE)
+    set(${OUT_SOURCE}
+        "${SOURCE_VALUE}"
+        PARENT_SCOPE)
+endfunction()
+
+function(paimon_get_dependency_root DEPENDENCY_NAME OUT_VAR)
+    set(_root_var "${DEPENDENCY_NAME}_ROOT")
+    if(DEFINED ${_root_var} AND NOT "${${_root_var}}" STREQUAL "")
+        set(${OUT_VAR}
+            "${${_root_var}}"
+            PARENT_SCOPE)
+    elseif(NOT "${PAIMON_PACKAGE_PREFIX}" STREQUAL "")
+        set(${OUT_VAR}
+            "${PAIMON_PACKAGE_PREFIX}"
+            PARENT_SCOPE)
+    else()
+        set(${OUT_VAR}
+            "<default search paths>"
+            PARENT_SCOPE)
+    endif()
+endfunction()
+
+function(paimon_get_dependency_compat_target DEPENDENCY_NAME OUT_VAR)
+    if("${DEPENDENCY_NAME}" STREQUAL "Arrow")
+        set(_target arrow)
+    elseif("${DEPENDENCY_NAME}" STREQUAL "ORC")
+        set(_target orc::orc)
+    elseif("${DEPENDENCY_NAME}" STREQUAL "Protobuf")
+        set(_target libprotobuf)
+    elseif("${DEPENDENCY_NAME}" STREQUAL "GTest")
+        set(_target GTest::gtest)
+    elseif("${DEPENDENCY_NAME}" STREQUAL "RE2")
+        set(_target re2::re2)
+    elseif("${DEPENDENCY_NAME}" STREQUAL "Snappy")
+        set(_target snappy)
+    elseif("${DEPENDENCY_NAME}" STREQUAL "LZ4")
+        set(_target lz4)
+    elseif("${DEPENDENCY_NAME}" STREQUAL "ZLIB")
+        set(_target zlib)
+    elseif("${DEPENDENCY_NAME}" STREQUAL "TBB")
+        set(_target tbb)
+    elseif("${DEPENDENCY_NAME}" STREQUAL "Avro")
+        set(_target avro)
+    else()
+        set(_target "${DEPENDENCY_NAME}")
+    endif()
+
+    set(${OUT_VAR}
+        "${_target}"
+        PARENT_SCOPE)
+endfunction()
+
+function(paimon_record_dependency_resolution
+         DEPENDENCY_NAME
+         REQUESTED_SOURCE
+         ACTUAL_SOURCE
+         TARGET_NAME)
+    get_property(_dependencies GLOBAL PROPERTY PAIMON_RESOLVED_DEPENDENCIES)
+    list(APPEND _dependencies "${DEPENDENCY_NAME}")
+    list(REMOVE_DUPLICATES _dependencies)
+    set_property(GLOBAL PROPERTY PAIMON_RESOLVED_DEPENDENCIES 
"${_dependencies}")
+
+    paimon_get_dependency_root("${DEPENDENCY_NAME}" _root)
+    set_property(GLOBAL PROPERTY "PAIMON_${DEPENDENCY_NAME}_REQUESTED_SOURCE"
+                                 "${REQUESTED_SOURCE}")
+    set_property(GLOBAL PROPERTY "PAIMON_${DEPENDENCY_NAME}_ACTUAL_SOURCE"
+                                 "${ACTUAL_SOURCE}")
+    set_property(GLOBAL PROPERTY "PAIMON_${DEPENDENCY_NAME}_ROOT" "${_root}")
+    set_property(GLOBAL PROPERTY "PAIMON_${DEPENDENCY_NAME}_TARGET" 
"${TARGET_NAME}")
+endfunction()
+
+function(paimon_print_dependency_resolution_summary)
+    get_property(_dependencies GLOBAL PROPERTY PAIMON_RESOLVED_DEPENDENCIES)
+    if(NOT _dependencies)
+        return()
+    endif()
+
+    message(STATUS "Dependency resolution summary:")
+    foreach(_dependency IN LISTS _dependencies)
+        get_property(_requested GLOBAL PROPERTY 
"PAIMON_${_dependency}_REQUESTED_SOURCE")
+        get_property(_actual GLOBAL PROPERTY 
"PAIMON_${_dependency}_ACTUAL_SOURCE")
+        get_property(_root GLOBAL PROPERTY "PAIMON_${_dependency}_ROOT")
+        get_property(_target GLOBAL PROPERTY "PAIMON_${_dependency}_TARGET")
+        message(STATUS "  ${_dependency}: requested=${_requested}, 
actual=${_actual}, target=${_target}, root=${_root}"
+        )
+    endforeach()
+endfunction()
+
+macro(paimon_build_dependency DEPENDENCY_NAME)
+    if("${DEPENDENCY_NAME}" STREQUAL "Arrow")
+        build_arrow()
+    elseif("${DEPENDENCY_NAME}" STREQUAL "fmt")
+        build_fmt()
+    elseif("${DEPENDENCY_NAME}" STREQUAL "RapidJSON")
+        build_rapidjson()
+    elseif("${DEPENDENCY_NAME}" STREQUAL "zstd")
+        build_zstd()
+    elseif("${DEPENDENCY_NAME}" STREQUAL "Snappy")
+        build_snappy()
+    elseif("${DEPENDENCY_NAME}" STREQUAL "LZ4")
+        build_lz4()
+    elseif("${DEPENDENCY_NAME}" STREQUAL "ZLIB")
+        build_zlib()
+    elseif("${DEPENDENCY_NAME}" STREQUAL "RE2")
+        build_re2()
+    elseif("${DEPENDENCY_NAME}" STREQUAL "Protobuf")
+        build_protobuf()
+    elseif("${DEPENDENCY_NAME}" STREQUAL "ORC")
+        build_orc()
+    elseif("${DEPENDENCY_NAME}" STREQUAL "TBB")
+        build_tbb()
+    elseif("${DEPENDENCY_NAME}" STREQUAL "glog")
+        build_glog()
+    elseif("${DEPENDENCY_NAME}" STREQUAL "Avro")
+        build_avro()
+    elseif("${DEPENDENCY_NAME}" STREQUAL "GTest")
+        build_gtest()
+    else()
+        message(FATAL_ERROR "No bundled build rule for ${DEPENDENCY_NAME}")
+    endif()
+endmacro()
+
+macro(resolve_dependency DEPENDENCY_NAME)
+    set(options)
+    set(one_value_args FIND_PACKAGE_NAME)
+    set(multi_value_args)
+    cmake_parse_arguments(ARG
+                          "${options}"
+                          "${one_value_args}"
+                          "${multi_value_args}"
+                          ${ARGN})
+
+    if(ARG_FIND_PACKAGE_NAME)
+        set(_paimon_find_package_name "${ARG_FIND_PACKAGE_NAME}")
+    else()
+        set(_paimon_find_package_name "${DEPENDENCY_NAME}")
+    endif()
+    set(_paimon_alt_package_name "${_paimon_find_package_name}Alt")
+    set(_paimon_found_var "${_paimon_alt_package_name}_FOUND")
+
+    paimon_get_dependency_source(${DEPENDENCY_NAME} _paimon_requested_source)
+    paimon_configure_dependency_root(${DEPENDENCY_NAME} 
"${_paimon_requested_source}"
+                                     _paimon_resolved_source)
+    paimon_get_dependency_compat_target(${DEPENDENCY_NAME} _paimon_target_name)
+
+    if(_paimon_resolved_source STREQUAL "BUNDLED")
+        message(STATUS "Using bundled ${DEPENDENCY_NAME}")
+        paimon_build_dependency(${DEPENDENCY_NAME})
+        set(PAIMON_${DEPENDENCY_NAME}_ACTUAL_SOURCE
+            "BUNDLED"
+            CACHE INTERNAL "Actual source for ${DEPENDENCY_NAME}")
+        paimon_record_dependency_resolution(
+            ${DEPENDENCY_NAME} "${_paimon_requested_source}" "BUNDLED"
+            "${_paimon_target_name}")
+    elseif(_paimon_resolved_source STREQUAL "SYSTEM")
+        message(STATUS "Using system ${DEPENDENCY_NAME}")
+        find_package(${_paimon_alt_package_name} REQUIRED MODULE)
+        set(PAIMON_${DEPENDENCY_NAME}_ACTUAL_SOURCE
+            "${_paimon_requested_source}"
+            CACHE INTERNAL "Actual source for ${DEPENDENCY_NAME}")
+        paimon_record_dependency_resolution(
+            ${DEPENDENCY_NAME} "${_paimon_requested_source}"
+            "${_paimon_requested_source}" "${_paimon_target_name}")
+    elseif(_paimon_resolved_source STREQUAL "AUTO")
+        message(STATUS "Resolving ${DEPENDENCY_NAME} with AUTO source")
+        find_package(${_paimon_alt_package_name} QUIET MODULE)
+        if(${_paimon_found_var})
+            message(STATUS "Using system ${DEPENDENCY_NAME}")
+            set(PAIMON_${DEPENDENCY_NAME}_ACTUAL_SOURCE
+                "SYSTEM"
+                CACHE INTERNAL "Actual source for ${DEPENDENCY_NAME}")
+            paimon_record_dependency_resolution(
+                ${DEPENDENCY_NAME} "${_paimon_requested_source}" "SYSTEM"
+                "${_paimon_target_name}")
+        else()
+            message(STATUS "System ${DEPENDENCY_NAME} not found; using 
bundled")
+            paimon_build_dependency(${DEPENDENCY_NAME})
+            set(PAIMON_${DEPENDENCY_NAME}_ACTUAL_SOURCE
+                "BUNDLED"
+                CACHE INTERNAL "Actual source for ${DEPENDENCY_NAME}")
+            paimon_record_dependency_resolution(
+                ${DEPENDENCY_NAME} "${_paimon_requested_source}" "BUNDLED"
+                "${_paimon_target_name}")
+        endif()
+    else()
+        message(FATAL_ERROR "Unsupported source ${_paimon_resolved_source} "
+                            "for ${DEPENDENCY_NAME}")
+    endif()
+
+    unset(_paimon_find_package_name)
+    unset(_paimon_alt_package_name)
+    unset(_paimon_found_var)
+    unset(_paimon_requested_source)
+    unset(_paimon_resolved_source)
+    unset(_paimon_target_name)
+endmacro()
+
+function(paimon_warn_if_mixed_arrow_dependencies)
+    if(NOT DEFINED PAIMON_Arrow_ACTUAL_SOURCE)
+        return()
+    endif()
+
+    foreach(_dependency
+            zstd
+            Snappy
+            LZ4
+            ZLIB
+            RE2)
+        if(DEFINED PAIMON_${_dependency}_ACTUAL_SOURCE
+           AND NOT "${PAIMON_${_dependency}_ACTUAL_SOURCE}" STREQUAL
+               "${PAIMON_Arrow_ACTUAL_SOURCE}")
+            message(WARNING "Arrow resolved from 
${PAIMON_Arrow_ACTUAL_SOURCE}, but "
+                            "${_dependency} resolved from "
+                            "${PAIMON_${_dependency}_ACTUAL_SOURCE}. Mixing 
SYSTEM "
+                            "and BUNDLED dependencies can cause ABI 
conflicts.")
+        endif()
+    endforeach()
+endfunction()
+
+macro(build_lucene)
+    message(STATUS "Building lucene from source")
+
+    get_target_property(LUCENE_ZLIB_INCLUDE_DIR zlib 
INTERFACE_INCLUDE_DIRECTORIES)
+    get_filename_component(LUCENE_ZLIB_ROOT "${LUCENE_ZLIB_INCLUDE_DIR}" 
DIRECTORY)
+
+    set(LUCENE_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/lucene_ep-install")
+
+    set(LUCENE_CMAKE_CXX_FLAGS "-pthread")
+    if(PAIMON_USE_CXX11_ABI)
+        string(APPEND LUCENE_CMAKE_CXX_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=1")
+    else()
+        string(APPEND LUCENE_CMAKE_CXX_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=0")
+    endif()
+
+    set(LUCENE_CMAKE_ARGS
+        ${EP_COMMON_CMAKE_ARGS}
+        "-DLUCENE_BUILD_SHARED=OFF"
+        "-DENABLE_TEST=OFF"
+        "-DCMAKE_C_FLAGS=-pthread"
+        "-DCMAKE_CXX_FLAGS=${LUCENE_CMAKE_CXX_FLAGS}"
+        "-DCMAKE_EXE_LINKER_FLAGS=-pthread"
+        "-DBoost_NO_BOOST_CMAKE=ON"
+        "-DBoost_NO_SYSTEM_PATHS=ON"
+        "-DBoost_USE_STATIC_LIBS=ON"
+        "-DBoost_INCLUDE_DIR=${BOOST_INCLUDE_DIR}"
+        "-DBoost_LIBRARY_DIR=${BOOST_LIBRARY_DIR}"
+        "-DBOOST_ROOT=${BOOST_INSTALL}"
+        "-DBoost_CHRONO_FOUND=TRUE"
+        "-DBoost_THREAD_FOUND=TRUE"
+        "-DZLIB_INCLUDE_DIRS=${ZLIB_INCLUDE_DIR}"
+        "-DZLIB_LIBRARY_RELEASE=${ZLIB_LIBRARIES}"
+        "-DZLIB_ROOT=${LUCENE_ZLIB_ROOT}"
+        "-DCMAKE_INSTALL_PREFIX=${LUCENE_PREFIX}")
+
+    set(LUCENE_LIB "${LUCENE_PREFIX}/lib/liblucene++.a")
+    externalproject_add(lucene_ep
+                        ${EP_COMMON_OPTIONS}
+                        URL ${LUCENE_SOURCE_URL}
+                        URL_HASH 
"SHA256=${PAIMON_LUCENE_BUILD_SHA256_CHECKSUM}"
+                        CMAKE_ARGS ${LUCENE_CMAKE_ARGS}
+                        BUILD_BYPRODUCTS ${LUCENE_LIB}
+                        DEPENDS zlib
+                                boost_date_time
+                                boost_filesystem
+                                boost_regex
+                                boost_thread
+                                boost_iostreams
+                                boost_system
+                                boost_chrono
+                                boost_atomic)
+
+    set(LUCENE_INCLUDE_DIR "${LUCENE_PREFIX}/include")
+    # The include directory must exist before it is referenced by a target.
+    file(MAKE_DIRECTORY "${LUCENE_INCLUDE_DIR}")
+    include_directories(SYSTEM ${LUCENE_INCLUDE_DIR} ${BOOST_INCLUDE_DIR})
+    add_library(lucene STATIC IMPORTED)
+    set_target_properties(lucene
+                          PROPERTIES IMPORTED_LOCATION "${LUCENE_LIB}"
+                                     INTERFACE_INCLUDE_DIRECTORIES
+                                     "${LUCENE_INCLUDE_DIR}")
+
+    target_link_libraries(lucene
+                          INTERFACE zlib
+                                    boost_date_time
+                                    boost_filesystem
+                                    boost_regex
+                                    boost_thread
+                                    boost_iostreams
+                                    boost_system
+                                    boost_chrono
+                                    boost_atomic
+                                    pthread
+                                    dl)
+    add_dependencies(lucene lucene_ep)
+endmacro()
+
+macro(build_jieba)
+    message(STATUS "Building limonp from source")
+    set(LIMONP_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/limonp_ep-prefix")
+    externalproject_add(limonp_ep
+                        URL ${LIMONP_SOURCE_URL}
+                        URL_HASH 
"SHA256=${PAIMON_LIMONP_BUILD_SHA256_CHECKSUM}"
+                        INSTALL_COMMAND "")
+
+    message(STATUS "Building jieba from source")
+    set(JIEBA_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/jieba_ep-prefix")
+    set(JIEBA_INSTALL "${CMAKE_CURRENT_BINARY_DIR}/jieba_ep-install")
+    set(JIEBA_INCLUDE_DIR "${JIEBA_INSTALL}/include")
+    set(JIEBA_DICT_DIR "${JIEBA_INSTALL}/dict")
+    file(MAKE_DIRECTORY ${JIEBA_INCLUDE_DIR})
+    file(MAKE_DIRECTORY ${JIEBA_DICT_DIR})
+
+    set(JIEBA_CMAKE_ARGS
+        ${EP_COMMON_CMAKE_ARGS} "-DENABLE_TEST=OFF" 
"-DCPPJIEBA_TOP_LEVEL_PROJECT=OFF"
+        "-DCMAKE_INSTALL_PREFIX=${JIEBA_INSTALL}")
+
+    set(PATCH_FILE "${CMAKE_CURRENT_LIST_DIR}/jieba.diff")
+    externalproject_add(jieba_ep
+                        ${EP_COMMON_OPTIONS}
+                        URL ${JIEBA_SOURCE_URL}
+                        URL_HASH "SHA256=${PAIMON_JIEBA_BUILD_SHA256_CHECKSUM}"
+                        CMAKE_ARGS ${JIEBA_CMAKE_ARGS}
+                        LOG_PATCH ON
+                        PATCH_COMMAND ${CMAKE_COMMAND} -E chdir <SOURCE_DIR> 
bash -c
+                                      "[ -f .patched ] && echo '<SOURCE_DIR> 
patch already applied, ignore...' || patch -s -N -p1 -i '${PATCH_FILE}' && 
touch .patched"
+                        INSTALL_COMMAND bash -c
+                                        "cp -r 
${JIEBA_PREFIX}/src/jieba_ep/include/* ${JIEBA_INSTALL}/include/ && cp -r 
${JIEBA_PREFIX}/src/jieba_ep/dict/* ${JIEBA_INSTALL}/dict/ && cp -r 
${LIMONP_PREFIX}/src/limonp_ep/include/* ${JIEBA_INSTALL}/include/"
+    )
+
+    # The include directory must exist before it is referenced by a target.
+    include_directories(SYSTEM ${JIEBA_INCLUDE_DIR} ${JIEBA_DICT_DIR})
+    add_library(jieba INTERFACE IMPORTED)
+    target_include_directories(jieba SYSTEM INTERFACE ${JIEBA_INCLUDE_DIR}
+                                                      ${JIEBA_DICT_DIR})
+    add_dependencies(jieba_ep limonp_ep)
+    add_dependencies(jieba jieba_ep)
+endmacro()
+
+macro(build_rapidjson)
+    message(STATUS "Building RapidJSON from source")
+    set(RAPIDJSON_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/rapidjson_ep-install")
+    set(RAPIDJSON_CMAKE_ARGS
+        ${EP_COMMON_CMAKE_ARGS}
+        -DRAPIDJSON_BUILD_DOC=OFF
+        -DRAPIDJSON_BUILD_EXAMPLES=OFF
+        -DRAPIDJSON_BUILD_TESTS=OFF
+        "-DCMAKE_INSTALL_PREFIX=${RAPIDJSON_PREFIX}")
+
+    externalproject_add(rapidjson_ep
+                        ${EP_COMMON_OPTIONS}
+                        URL ${RAPIDJSON_SOURCE_URL}
+                        URL_HASH 
"SHA256=${PAIMON_RAPIDJSON_BUILD_SHA256_CHECKSUM}"
+                        CMAKE_ARGS ${RAPIDJSON_CMAKE_ARGS})
+
+    set(RAPIDJSON_INCLUDE_DIR "${RAPIDJSON_PREFIX}/include")
+    # The include directory must exist before it is referenced by a target.
+    file(MAKE_DIRECTORY "${RAPIDJSON_INCLUDE_DIR}")
+
+    include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR})
+    add_library(RapidJSON INTERFACE IMPORTED)
+    target_include_directories(RapidJSON INTERFACE "${RAPIDJSON_INCLUDE_DIR}")
+    add_dependencies(RapidJSON rapidjson_ep)
+endmacro()
+
+macro(build_fmt)
+    message(STATUS "Building fmt from source")
+    set(FMT_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/fmt_ep-install")
+    set(FMT_INCLUDE_DIR "${FMT_PREFIX}/include")
+    if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG")
+        set(FMT_LIB_SUFFIX "d")
+    else()
+        set(FMT_LIB_SUFFIX "")
+    endif()
+    set(FMT_STATIC_LIB_NAME fmt)
+    set(FMT_STATIC_LIB
+        
"${FMT_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${FMT_STATIC_LIB_NAME}${FMT_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    set(FMT_LIBRARIES ${FMT_STATIC_LIB})
+    set(FMT_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} -Wno-error")
+    set(FMT_CMAKE_C_FLAGS "${EP_C_FLAGS} -Wno-error")
+    string(REPLACE "-Werror" "" FMT_CMAKE_CXX_FLAGS ${FMT_CMAKE_CXX_FLAGS})
+
+    set(FMT_CMAKE_ARGS
+        ${EP_COMMON_CMAKE_ARGS}
+        -DCMAKE_INSTALL_PREFIX=${FMT_PREFIX}
+        "-DCMAKE_CXX_FLAGS=${FMT_CMAKE_CXX_FLAGS}"
+        "-DCMAKE_C_FLAGS=${FMT_CMAKE_C_FLAGS}"
+        -DFMT_TEST=OFF
+        -DFMT_DOC=OFF)
+    set(FMT_CONFIGURE CMAKE_ARGS ${FMT_CMAKE_ARGS})
+    externalproject_add(fmt_ep
+                        URL ${FMT_SOURCE_URL}
+                        URL_HASH "SHA256=${PAIMON_FMT_BUILD_SHA256_CHECKSUM}"
+                        ${FMT_CONFIGURE} ${THIRDPARTY_LOG_OPTIONS}
+                        BUILD_BYPRODUCTS ${FMT_STATIC_LIB})
+
+    file(MAKE_DIRECTORY "${FMT_INCLUDE_DIR}")
+
+    include_directories(SYSTEM ${FMT_INCLUDE_DIR})
+    add_library(fmt STATIC IMPORTED)
+    set_target_properties(fmt PROPERTIES IMPORTED_LOCATION ${FMT_STATIC_LIB})
+    target_include_directories(fmt INTERFACE ${FMT_INCLUDE_DIR})
+    add_dependencies(fmt fmt_ep)
+endmacro(build_fmt)
+
+macro(build_boost)
+    message(STATUS "Building boost from source")
+    set(BOOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/boost_ep-prefix")
+    set(BOOST_INSTALL "${CMAKE_CURRENT_BINARY_DIR}/boost_ep-install")
+    set(BOOST_INCLUDE_DIR "${BOOST_INSTALL}/include")
+    set(BOOST_LIBRARY_DIR ${BOOST_INSTALL}/lib)
+    file(MAKE_DIRECTORY ${BOOST_INCLUDE_DIR})
+    file(MAKE_DIRECTORY ${BOOST_LIBRARY_DIR})
+
+    set(BOOST_BYPRODUCTS
+        ${BOOST_LIBRARY_DIR}/libboost_date_time.a
+        ${BOOST_LIBRARY_DIR}/libboost_filesystem.a
+        ${BOOST_LIBRARY_DIR}/libboost_system.a
+        ${BOOST_LIBRARY_DIR}/libboost_regex.a
+        ${BOOST_LIBRARY_DIR}/libboost_thread.a
+        ${BOOST_LIBRARY_DIR}/libboost_atomic.a
+        ${BOOST_LIBRARY_DIR}/libboost_chrono.a
+        ${BOOST_LIBRARY_DIR}/libboost_iostreams.a)
+
+    set(BOOST_CXX_FLAGS "-fPIC")
+    if(PAIMON_USE_CXX11_ABI)
+        string(APPEND BOOST_CXX_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=1")
+    else()
+        string(APPEND BOOST_CXX_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=0")
+    endif()
+
+    externalproject_add(boost_ep
+                        URL "${THIRDPARTY_DIR}/boost/${PAIMON_BOOST_PKG_NAME}"
+                        URL_HASH "SHA256=${PAIMON_BOOST_BUILD_SHA256_CHECKSUM}"
+                        CONFIGURE_COMMAND 
${BOOST_PREFIX}/src/boost_ep/bootstrap.sh
+                                          
--with-libraries=date_time,filesystem,iostreams,regex,system,thread,chrono,atomic
+                        BUILD_IN_SOURCE TRUE
+                        BUILD_COMMAND ${BOOST_PREFIX}/src/boost_ep/b2
+                                      --prefix=${BOOST_INSTALL}
+                                      --libdir=${BOOST_LIBRARY_DIR} link=static
+                                      -sZLIB_INCLUDE=${ZLIB_INCLUDE_DIR}
+                                      -sZLIB_LIBRARY_PATH=${ZLIB_PREFIX}/lib
+                                      runtime-link=shared threading=multi 
variant=release
+                                      cxxflags=${BOOST_CXX_FLAGS} install
+                        INSTALL_COMMAND bash -c
+                                        "mkdir -p 
${BOOST_INSTALL}/include/boost && cp -r 
${BOOST_PREFIX}/src/boost_ep/libs/*/include/boost/* 
${BOOST_INSTALL}/include/boost && cp -r 
${BOOST_PREFIX}/src/boost_ep/libs/*/*/include/boost/* 
${BOOST_INSTALL}/include/boost"
+                        DEPENDS zlib
+                        BUILD_BYPRODUCTS ${BOOST_BYPRODUCTS}
+                        LOG_DOWNLOAD ON
+                        LOG_CONFIGURE ON
+                        LOG_BUILD ON)
+
+    include_directories(SYSTEM ${BOOST_INCLUDE_DIR})
+
+    add_library(boost_atomic STATIC IMPORTED)
+    set_target_properties(boost_atomic
+                          PROPERTIES IMPORTED_LOCATION
+                                     ${BOOST_LIBRARY_DIR}/libboost_atomic.a
+                                     INTERFACE_INCLUDE_DIRECTORIES 
${BOOST_INCLUDE_DIR})
+    add_library(boost_chrono STATIC IMPORTED)
+    set_target_properties(boost_chrono
+                          PROPERTIES IMPORTED_LOCATION
+                                     ${BOOST_LIBRARY_DIR}/libboost_chrono.a
+                                     INTERFACE_INCLUDE_DIRECTORIES 
${BOOST_INCLUDE_DIR})
+    add_library(boost_date_time STATIC IMPORTED)
+    set_target_properties(boost_date_time
+                          PROPERTIES IMPORTED_LOCATION
+                                     ${BOOST_LIBRARY_DIR}/libboost_date_time.a
+                                     INTERFACE_INCLUDE_DIRECTORIES 
${BOOST_INCLUDE_DIR})
+    add_library(boost_filesystem STATIC IMPORTED)
+    set_target_properties(boost_filesystem
+                          PROPERTIES IMPORTED_LOCATION
+                                     ${BOOST_LIBRARY_DIR}/libboost_filesystem.a
+                                     INTERFACE_INCLUDE_DIRECTORIES 
${BOOST_INCLUDE_DIR})
+    add_library(boost_regex STATIC IMPORTED)
+    set_target_properties(boost_regex
+                          PROPERTIES IMPORTED_LOCATION
+                                     ${BOOST_LIBRARY_DIR}/libboost_regex.a
+                                     INTERFACE_INCLUDE_DIRECTORIES 
${BOOST_INCLUDE_DIR})
+    add_library(boost_thread STATIC IMPORTED)
+    set_target_properties(boost_thread
+                          PROPERTIES IMPORTED_LOCATION
+                                     ${BOOST_LIBRARY_DIR}/libboost_thread.a
+                                     INTERFACE_INCLUDE_DIRECTORIES 
${BOOST_INCLUDE_DIR})
+    add_library(boost_iostreams STATIC IMPORTED)
+    set_target_properties(boost_iostreams
+                          PROPERTIES IMPORTED_LOCATION
+                                     ${BOOST_LIBRARY_DIR}/libboost_iostreams.a
+                                     INTERFACE_INCLUDE_DIRECTORIES 
${BOOST_INCLUDE_DIR})
+    add_library(boost_system STATIC IMPORTED)
+    set_target_properties(boost_system
+                          PROPERTIES IMPORTED_LOCATION
+                                     ${BOOST_LIBRARY_DIR}/libboost_system.a
+                                     INTERFACE_INCLUDE_DIRECTORIES 
${BOOST_INCLUDE_DIR})
+
+    add_dependencies(boost_atomic boost_ep)
+    add_dependencies(boost_chrono boost_ep)
+    add_dependencies(boost_date_time boost_ep)
+    add_dependencies(boost_filesystem boost_ep)
+    add_dependencies(boost_regex boost_ep)
+    add_dependencies(boost_thread boost_ep)
+    add_dependencies(boost_iostreams boost_ep)
+    add_dependencies(boost_system boost_ep)
+endmacro(build_boost)
+
+macro(build_re2)
+    message(STATUS "Building RE2 from source")
+    set(RE2_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/re2_ep-install")
+    set(RE2_INCLUDE_DIR "${RE2_PREFIX}/include")
+    set(RE2_STATIC_LIB
+        
"${RE2_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}re2${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    set(RE2_LIBRARIES ${RE2_STATIC_LIB})
+
+    set(RE2_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} 
"-DCMAKE_INSTALL_PREFIX=${RE2_PREFIX}")
+
+    externalproject_add(re2_ep
+                        ${EP_COMMON_OPTIONS}
+                        INSTALL_DIR ${RE2_PREFIX}
+                        URL ${RE2_SOURCE_URL}
+                        URL_HASH "SHA256=${PAIMON_RE2_BUILD_SHA256_CHECKSUM}"
+                        CMAKE_ARGS ${RE2_CMAKE_ARGS} ${THIRDPARTY_LOG_OPTIONS}
+                        BUILD_BYPRODUCTS "${RE2_STATIC_LIB}")
+
+    file(MAKE_DIRECTORY "${RE2_INCLUDE_DIR}")
+
+    include_directories(SYSTEM ${RE2_INCLUDE_DIR})
+    add_library(re2::re2 STATIC IMPORTED)
+    set_target_properties(re2::re2 PROPERTIES IMPORTED_LOCATION 
"${RE2_STATIC_LIB}")
+    target_include_directories(re2::re2 INTERFACE "${RE2_INCLUDE_DIR}")
+    add_dependencies(re2::re2 re2_ep)
+endmacro()
+
+macro(build_snappy)
+    message(STATUS "Building snappy from source")
+    set(SNAPPY_HOME "${CMAKE_CURRENT_BINARY_DIR}/snappy_ep-install")
+    set(SNAPPY_INCLUDE_DIR "${SNAPPY_HOME}/include")
+    set(SNAPPY_STATIC_LIB
+        
"${SNAPPY_HOME}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}snappy${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    set(SNAPPY_LIBRARIES ${SNAPPY_STATIC_LIB})
+    set(SNAPPY_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} 
-DCMAKE_INSTALL_PREFIX=${SNAPPY_HOME}
+                          -DSNAPPY_BUILD_TESTS=OFF 
-DSNAPPY_BUILD_BENCHMARKS=OFF)
+
+    externalproject_add(snappy_ep
+                        URL ${SNAPPY_SOURCE_URL}
+                        URL_HASH 
"SHA256=${PAIMON_SNAPPY_BUILD_SHA256_CHECKSUM}"
+                        CMAKE_ARGS ${SNAPPY_CMAKE_ARGS} 
${THIRDPARTY_LOG_OPTIONS}
+                        BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}")
+
+    file(MAKE_DIRECTORY "${SNAPPY_INCLUDE_DIR}")
+
+    include_directories(SYSTEM ${SNAPPY_INCLUDE_DIR})
+    add_library(snappy STATIC IMPORTED)
+    set_target_properties(snappy PROPERTIES IMPORTED_LOCATION 
${SNAPPY_STATIC_LIB})
+    target_include_directories(snappy INTERFACE ${SNAPPY_INCLUDE_DIR})
+    add_dependencies(snappy snappy_ep)
+endmacro()
+
+macro(build_zlib)
+    message(STATUS "Building zlib from source")
+    set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep-install")
+    set(ZLIB_INCLUDE_DIR "${ZLIB_PREFIX}/include")
+    set(ZLIB_STATIC_LIB_NAME z)
+    set(ZLIB_STATIC_LIB
+        
"${ZLIB_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${ZLIB_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    set(ZLIB_LIBRARIES ${ZLIB_STATIC_LIB})
+    set(ZLIB_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} 
-DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX})
+
+    externalproject_add(zlib_ep
+                        URL ${ZLIB_SOURCE_URL}
+                        URL_HASH "SHA256=${PAIMON_ZLIB_BUILD_SHA256_CHECKSUM}"
+                        CMAKE_ARGS ${ZLIB_CMAKE_ARGS} ${THIRDPARTY_LOG_OPTIONS}
+                        BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}")
+
+    file(MAKE_DIRECTORY "${ZLIB_INCLUDE_DIR}")
+
+    include_directories(SYSTEM ${ZLIB_INCLUDE_DIR})
+    add_library(zlib STATIC IMPORTED)
+    set_target_properties(zlib PROPERTIES IMPORTED_LOCATION ${ZLIB_STATIC_LIB})
+    target_include_directories(zlib INTERFACE ${ZLIB_INCLUDE_DIR})
+    add_dependencies(zlib zlib_ep)
+endmacro()
+
+macro(build_zstd)
+    message(STATUS "Building zstd from source")
+    set(ZSTD_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zstd_ep-install")
+    set(ZSTD_INCLUDE_DIR "${ZSTD_PREFIX}/include")
+    set(ZSTD_STATIC_LIB_NAME zstd)
+    set(ZSTD_STATIC_LIB
+        
"${ZSTD_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${ZSTD_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    set(ZSTD_LIBRARIES ${ZSTD_STATIC_LIB})
+    set(ZSTD_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} -Wno-error")
+    set(ZSTD_CMAKE_C_FLAGS "${EP_C_FLAGS} -Wno-error")
+    string(REPLACE "-Werror" "" ZSTD_CMAKE_CXX_FLAGS ${ZSTD_CMAKE_CXX_FLAGS})
+
+    set(ZSTD_CMAKE_ARGS
+        ${EP_COMMON_CMAKE_ARGS}
+        -DCMAKE_INSTALL_PREFIX=${ZSTD_PREFIX}
+        "-DCMAKE_CXX_FLAGS=${ZSTD_CMAKE_CXX_FLAGS}"
+        "-DCMAKE_C_FLAGS=${ZSTD_CMAKE_C_FLAGS}"
+        -DZSTD_BUILD_SHARED=OFF
+        -DZSTD_BUILD_PROGRAMS=OFF)
+
+    set(ZSTD_CONFIGURE SOURCE_SUBDIR "build/cmake" CMAKE_ARGS 
${ZSTD_CMAKE_ARGS})
+    externalproject_add(zstd_ep
+                        URL ${ZSTD_SOURCE_URL}
+                        URL_HASH "SHA256=${PAIMON_ZSTD_BUILD_SHA256_CHECKSUM}"
+                        ${ZSTD_CONFIGURE} ${THIRDPARTY_LOG_OPTIONS}
+                        BUILD_BYPRODUCTS ${ZSTD_STATIC_LIB})
+
+    file(MAKE_DIRECTORY "${ZSTD_INCLUDE_DIR}")
+
+    include_directories(SYSTEM ${ZSTD_INCLUDE_DIR})
+    add_library(zstd STATIC IMPORTED)
+    set_target_properties(zstd PROPERTIES IMPORTED_LOCATION ${ZSTD_STATIC_LIB})
+    target_include_directories(zstd INTERFACE ${ZSTD_INCLUDE_DIR})
+    add_dependencies(zstd zstd_ep)
+endmacro(build_zstd)
+
+macro(build_lz4)
+    message(STATUS "Building lz4 from source")
+    set(LZ4_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/lz4_ep-install")
+    set(LZ4_INCLUDE_DIR "${LZ4_PREFIX}/include")
+    set(LZ4_STATIC_LIB
+        
"${LZ4_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}lz4${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    set(LZ4_LIBRARIES ${LZ4_STATIC_LIB})
+    set(LZ4_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} 
-DCMAKE_INSTALL_PREFIX=${LZ4_PREFIX}
+                       -DLZ4_BUILD_CLI=OFF -DLZ4_BUILD_LEGACY_LZ4C=OFF)
+
+    set(LZ4_CONFIGURE SOURCE_SUBDIR "build/cmake" CMAKE_ARGS ${LZ4_CMAKE_ARGS})
+    externalproject_add(lz4_ep
+                        URL ${LZ4_SOURCE_URL}
+                        URL_HASH "SHA256=${PAIMON_LZ4_BUILD_SHA256_CHECKSUM}"
+                        ${LZ4_CONFIGURE} ${THIRDPARTY_LOG_OPTIONS}
+                        BUILD_BYPRODUCTS ${LZ4_STATIC_LIB})
+
+    file(MAKE_DIRECTORY "${LZ4_INCLUDE_DIR}")
+
+    include_directories(SYSTEM ${LZ4_INCLUDE_DIR})
+    add_library(lz4 STATIC IMPORTED)
+    set_target_properties(lz4 PROPERTIES IMPORTED_LOCATION ${LZ4_STATIC_LIB})
+    target_include_directories(lz4 INTERFACE ${LZ4_INCLUDE_DIR})
+    add_dependencies(lz4 lz4_ep)
+endmacro()
+
+macro(build_jindosdk_c)
+    message(STATUS "Building jindosdk-c from precompiled package")
+
+    set(JINDOSDK_C_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/jindosdk_ep-install")
+    set(JINDOSDK_C_HOME "${JINDOSDK_C_PREFIX}")
+    set(JINDOSDK_C_INCLUDE_DIR "${JINDOSDK_C_PREFIX}/include")
+    set(JINDOSDK_C_LIB_DIR "${JINDOSDK_C_PREFIX}/lib/native")
+    set(JINDOSDK_C_DYNAMIC_LIB 
"${JINDOSDK_C_LIB_DIR}/${JINDOSDK_C_DYNAMIC_LIB_FILE}")
+
+    # Extract and install jindosdk from precompiled package
+    externalproject_add(jindosdk_ep
+                        URL ${JINDOSDK_C_SOURCE_URL}
+                        URL_HASH "SHA256=${JINDOSDK_C_BUILD_SHA256_CHECKSUM}"
+                        ${THIRDPARTY_LOG_OPTIONS}
+                        CONFIGURE_COMMAND ""
+                        BUILD_COMMAND ""
+                        INSTALL_COMMAND bash -c
+                                        "cp -r <SOURCE_DIR>/include/* 
${JINDOSDK_C_INCLUDE_DIR}"
+                        COMMAND bash -c
+                                "cp -r 
<SOURCE_DIR>/lib/native/${JINDOSDK_C_DYNAMIC_LIB_FILE}* ${JINDOSDK_C_LIB_DIR}"
+                        BUILD_BYPRODUCTS "${JINDOSDK_C_DYNAMIC_LIB}")
+
+    # The include directory must exist before it is referenced by a target.
+    file(MAKE_DIRECTORY "${JINDOSDK_C_INCLUDE_DIR}")
+    file(MAKE_DIRECTORY "${JINDOSDK_C_LIB_DIR}")
+
+    add_library(jindosdk::c_sdk SHARED IMPORTED)
+    set_target_properties(jindosdk::c_sdk
+                          PROPERTIES IMPORTED_LOCATION 
"${JINDOSDK_C_DYNAMIC_LIB}"
+                                     INTERFACE_INCLUDE_DIRECTORIES
+                                     "${JINDOSDK_C_INCLUDE_DIR}")
+    list(APPEND JINDOSDK_INCLUDE_DIR ${JINDOSDK_C_INCLUDE_DIR})
+
+    add_dependencies(jindosdk::c_sdk jindosdk_ep)
+    install(DIRECTORY "${JINDOSDK_C_LIB_DIR}/"
+            DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            FILES_MATCHING
+            PATTERN "${JINDOSDK_C_DYNAMIC_LIB_FILE}*")
+
+endmacro()
+
+macro(build_jindosdk_nextarch)
+    message(STATUS "Building jindosdk-nextarch from local source")
+
+    set(JINDOSDK_NEXTARCH_PREFIX
+        "${CMAKE_CURRENT_BINARY_DIR}/jindosdk-nextarch_ep-install")
+    set(JINDOSDK_NEXTARCH_HOME "${JINDOSDK_NEXTARCH_PREFIX}")
+    set(JINDOSDK_NEXTARCH_INCLUDE_DIR "${JINDOSDK_NEXTARCH_PREFIX}/include")
+    set(JINDOSDK_NEXTARCH_LIB_DIR "${JINDOSDK_NEXTARCH_PREFIX}/lib")
+    set(JINDOSDK_NEXTARCH_SOURCE_DIR 
"${CMAKE_SOURCE_DIR}/third_party/jindosdk-nextarch")
+    set(JINDOSDK_NEXTARCH_STATIC_LIB
+        "${JINDOSDK_NEXTARCH_LIB_DIR}/libjindosdk-nextarch.a")
+
+    # Get jindosdk dependencies (headers and dynamic library)
+    get_target_property(JINDOSDK_C_INCLUDE_DIR jindosdk::c_sdk
+                        INTERFACE_INCLUDE_DIRECTORIES)
+    get_target_property(JINDOSDK_C_LIBRARY_LOCATION jindosdk::c_sdk 
IMPORTED_LOCATION)
+    get_filename_component(JINDOSDK_C_DIR_ROOT "${JINDOSDK_C_INCLUDE_DIR}" 
DIRECTORY)
+
+    # Compile flags for jindosdk-nextarch
+    set(JINDOSDK_NEXTARCH_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS}")
+    set(JINDOSDK_NEXTARCH_CMAKE_C_FLAGS "${EP_C_FLAGS}")
+    set(JINDOSDK_NEXTARCH_CMAKE_ARGS
+        ${EP_COMMON_CMAKE_ARGS}
+        "-DCMAKE_INSTALL_PREFIX=${JINDOSDK_NEXTARCH_PREFIX}"
+        "-DCMAKE_CXX_FLAGS=${JINDOSDK_NEXTARCH_CMAKE_CXX_FLAGS}"
+        "-DCMAKE_C_FLAGS=${JINDOSDK_NEXTARCH_CMAKE_C_FLAGS}"
+        -DJINDOSDK_ROOT=${JINDOSDK_C_DIR_ROOT}
+        -DJINDOSDK_LIBRARY_NAME=${JINDOSDK_C_DYNAMIC_LIB_NAME})
+
+    externalproject_add(jindosdk-nextarch_ep
+                        SOURCE_DIR ${JINDOSDK_NEXTARCH_SOURCE_DIR}
+                        CMAKE_ARGS ${JINDOSDK_NEXTARCH_CMAKE_ARGS}
+                        BUILD_BYPRODUCTS "${JINDOSDK_NEXTARCH_STATIC_LIB}"
+                        DEPENDS jindosdk::c_sdk ${THIRDPARTY_LOG_OPTIONS})
+
+    # The include directory must exist before it is referenced by a target.
+    file(MAKE_DIRECTORY "${JINDOSDK_NEXTARCH_INCLUDE_DIR}")
+    file(MAKE_DIRECTORY "${JINDOSDK_NEXTARCH_LIB_DIR}")
+
+    add_library(jindosdk::nextarch STATIC IMPORTED)
+    set_target_properties(jindosdk::nextarch
+                          PROPERTIES IMPORTED_LOCATION 
"${JINDOSDK_NEXTARCH_STATIC_LIB}"
+                                     INTERFACE_INCLUDE_DIRECTORIES
+                                     "${JINDOSDK_NEXTARCH_INCLUDE_DIR}")
+    target_link_libraries(jindosdk::nextarch INTERFACE jindosdk::c_sdk pthread 
dl)
+    list(APPEND JINDOSDK_INCLUDE_DIR ${JINDOSDK_NEXTARCH_INCLUDE_DIR})
+
+    add_dependencies(jindosdk::nextarch jindosdk-nextarch_ep)
+endmacro()
+
+macro(build_protobuf)
+    message(STATUS "Building protobuf from source")
+    set(PROTOBUF_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep-install")
+    set(PROTOBUF_INCLUDE_DIR "${PROTOBUF_PREFIX}/include")
+    set(PROTOBUF_STATIC_LIB
+        
"${PROTOBUF_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}protobuf${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    set(PROTOC_STATIC_LIB
+        
"${PROTOBUF_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}protoc${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    set(PROTOBUF_LIBRARIES ${PROTOBUF_STATIC_LIB})
+    set(PROTOBUF_COMPILER "${PROTOBUF_PREFIX}/bin/protoc")
+
+    get_target_property(THIRDPARTY_ZLIB_INCLUDE_DIR zlib 
INTERFACE_INCLUDE_DIRECTORIES)
+    get_filename_component(THIRDPARTY_ZLIB_ROOT 
"${THIRDPARTY_ZLIB_INCLUDE_DIR}"
+                           DIRECTORY)
+
+    # Strip lto flags (which may be added by dh_auto_configure)
+    # See https://github.com/protocolbuffers/protobuf/issues/7092
+    set(PROTOBUF_C_FLAGS ${EP_C_FLAGS})
+    set(PROTOBUF_CXX_FLAGS ${EP_CXX_FLAGS})
+    string(REPLACE "-flto=auto" "" PROTOBUF_C_FLAGS "${PROTOBUF_C_FLAGS}")
+    string(REPLACE "-ffat-lto-objects" "" PROTOBUF_C_FLAGS 
"${PROTOBUF_C_FLAGS}")
+    string(REPLACE "-flto=auto" "" PROTOBUF_CXX_FLAGS "${PROTOBUF_CXX_FLAGS}")
+    string(REPLACE "-ffat-lto-objects" "" PROTOBUF_CXX_FLAGS 
"${PROTOBUF_CXX_FLAGS}")
+    string(APPEND PROTOBUF_CXX_FLAGS
+           " -Wno-inconsistent-missing-override 
-Wno-unneeded-internal-declaration")
+    set(PROTOBUF_CMAKE_ARGS
+        ${EP_COMMON_CMAKE_ARGS}
+        -DCMAKE_INSTALL_PREFIX=${PROTOBUF_PREFIX}
+        "-DCMAKE_CXX_FLAGS=${PROTOBUF_CXX_FLAGS}"
+        "-DCMAKE_C_FLAGS=${PROTOBUF_C_FLAGS}"
+        "-DZLIB_ROOT=${THIRDPARTY_ZLIB_ROOT}"
+        -Dprotobuf_BUILD_TESTS=OFF
+        -Dprotobuf_DEBUG_POSTFIX=)
+    set(PROTOBUF_CONFIGURE SOURCE_SUBDIR "cmake" CMAKE_ARGS 
${PROTOBUF_CMAKE_ARGS})
+
+    externalproject_add(protobuf_ep
+                        URL ${PROTOBUF_SOURCE_URL}
+                        URL_HASH 
"SHA256=${PAIMON_PROTOBUF_BUILD_SHA256_CHECKSUM}"
+                        ${PROTOBUF_CONFIGURE} ${THIRDPARTY_LOG_OPTIONS}
+                        # BUILD_IN_SOURCE 1
+                        BUILD_BYPRODUCTS "${PROTOBUF_STATIC_LIB}" 
"${PROTOBUF_COMPILER}"
+                        DEPENDS zlib)
+
+    file(MAKE_DIRECTORY "${PROTOBUF_INCLUDE_DIR}")
+
+    include_directories(SYSTEM ${PROTOBUF_INCLUDE_DIR})
+    add_library(libprotobuf STATIC IMPORTED)
+    set_target_properties(libprotobuf PROPERTIES IMPORTED_LOCATION 
${PROTOBUF_STATIC_LIB})
+    target_include_directories(libprotobuf INTERFACE ${PROTOBUF_INCLUDE_DIR})
+    add_library(libprotoc STATIC IMPORTED)
+    set_target_properties(libprotoc PROPERTIES IMPORTED_LOCATION 
${PROTOC_STATIC_LIB})
+    target_include_directories(libprotoc INTERFACE ${PROTOBUF_INCLUDE_DIR})
+
+    add_executable(protoc IMPORTED)
+    set_target_properties(protoc PROPERTIES IMPORTED_LOCATION 
${PROTOBUF_COMPILER})
+
+    add_dependencies(libprotobuf protobuf_ep)
+    add_dependencies(protoc protobuf_ep)
+endmacro()
+
+macro(build_avro)
+    message(STATUS "Building avro from source")
+    set(AVRO_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/avro_ep-install")
+    set(AVRO_INCLUDE_DIR "${AVRO_PREFIX}/include")
+    set(AVRO_STATIC_LIB_NAME avrocpp_s)
+    set(AVRO_STATIC_LIB
+        
"${AVRO_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${AVRO_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    set(AVRO_LIBRARIES ${AVRO_STATIC_LIB})
+
+    get_target_property(AVRO_SNAPPY_INCLUDE_DIR snappy 
INTERFACE_INCLUDE_DIRECTORIES)
+    get_filename_component(AVRO_SNAPPY_ROOT "${AVRO_SNAPPY_INCLUDE_DIR}" 
DIRECTORY)
+
+    get_target_property(AVRO_ZSTD_INCLUDE_DIR zstd 
INTERFACE_INCLUDE_DIRECTORIES)
+    get_filename_component(AVRO_ZSTD_ROOT "${AVRO_ZSTD_INCLUDE_DIR}" DIRECTORY)
+
+    get_target_property(AVRO_ZLIB_INCLUDE_DIR zlib 
INTERFACE_INCLUDE_DIRECTORIES)
+    get_filename_component(AVRO_ZLIB_ROOT "${AVRO_ZLIB_INCLUDE_DIR}" DIRECTORY)
+
+    get_target_property(AVRO_FMT_INCLUDE_DIR fmt INTERFACE_INCLUDE_DIRECTORIES)
+    get_filename_component(AVRO_FMT_ROOT "${AVRO_FMT_INCLUDE_DIR}" DIRECTORY)
+
+    set(AVRO_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} -Wno-error")
+    set(AVRO_CMAKE_C_FLAGS "${EP_C_FLAGS} -Wno-error")
+
+    set(AVRO_CMAKE_ARGS
+        ${EP_COMMON_CMAKE_ARGS}
+        "-DCMAKE_INSTALL_PREFIX=${AVRO_PREFIX}"
+        "-DCMAKE_CXX_FLAGS=${AVRO_CMAKE_CXX_FLAGS}"
+        "-DCMAKE_C_FLAGS=${AVRO_CMAKE_C_FLAGS}"
+        "-DAVRO_BUILD_TESTS=OFF"
+        "-DAVRO_BUILD_EXECUTABLES=OFF"
+        "-DZLIB_ROOT=${AVRO_ZLIB_ROOT}"
+        "-Dfmt_ROOT=${AVRO_FMT_ROOT}"
+        "-Dzstd_ROOT=${AVRO_ZSTD_ROOT}"
+        "-DSnappy_ROOT=${AVRO_SNAPPY_ROOT}")
+    externalproject_add(avro_ep
+                        URL ${AVRO_SOURCE_URL}
+                        URL_HASH "SHA256=${PAIMON_AVRO_BUILD_SHA256_CHECKSUM}"
+                        SOURCE_SUBDIR "lang/c++"
+                        CMAKE_ARGS ${AVRO_CMAKE_ARGS}
+                        BUILD_BYPRODUCTS "${AVRO_STATIC_LIB}"
+                        DEPENDS fmt zlib zstd snappy)
+
+    file(MAKE_DIRECTORY "${AVRO_INCLUDE_DIR}")
+
+    include_directories(SYSTEM ${AVRO_INCLUDE_DIR})
+    add_library(avro STATIC IMPORTED)
+    set_target_properties(avro PROPERTIES IMPORTED_LOCATION ${AVRO_STATIC_LIB})
+    target_include_directories(avro INTERFACE ${AVRO_INCLUDE_DIR})
+    target_link_libraries(avro INTERFACE zlib zstd snappy)
+    add_dependencies(avro avro_ep)
+endmacro()
+
+macro(build_orc)
+    message(STATUS "Building orc from source")
+
+    get_target_property(ORC_SNAPPY_INCLUDE_DIR snappy 
INTERFACE_INCLUDE_DIRECTORIES)
+    get_filename_component(ORC_SNAPPY_ROOT "${ORC_SNAPPY_INCLUDE_DIR}" 
DIRECTORY)
+
+    get_target_property(ORC_LZ4_INCLUDE_DIR lz4 INTERFACE_INCLUDE_DIRECTORIES)
+    get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_INCLUDE_DIR}" DIRECTORY)
+
+    get_target_property(ORC_ZSTD_INCLUDE_DIR zstd 
INTERFACE_INCLUDE_DIRECTORIES)
+    get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_INCLUDE_DIR}" DIRECTORY)
+
+    get_target_property(ORC_ZLIB_INCLUDE_DIR zlib 
INTERFACE_INCLUDE_DIRECTORIES)
+    get_filename_component(ORC_ZLIB_ROOT "${ORC_ZLIB_INCLUDE_DIR}" DIRECTORY)
+
+    get_target_property(ORC_PROTOBUF_INCLUDE_DIR libprotobuf
+                        INTERFACE_INCLUDE_DIRECTORIES)
+    get_filename_component(ORC_PROTOBUF_ROOT "${ORC_PROTOBUF_INCLUDE_DIR}" 
DIRECTORY)
+
+    get_property(PAIMON_RPATH GLOBAL PROPERTY PAIMON_RPATH)
+    message(STATUS "PAIMON_RPATH value: ${PAIMON_RPATH}")
+    set(ORC_RPATH ${PAIMON_RPATH})
+    message(STATUS "ORC_RPATH value: ${ORC_RPATH}")
+
+    string(REPLACE "-Werror" "" EP_CXX_FLAGS ${EP_CXX_FLAGS})
+
+    set(ORC_CMAKE_CXX_FLAGS
+        "${EP_CXX_FLAGS} -fPIC -Wno-error 
${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}}")
+    set(ORC_CMAKE_C_FLAGS
+        "${EP_C_FLAGS} -fPIC -Wno-error 
${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}}")
+
+    set(ORC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/orc_ep-prefix")
+    set(ORC_INCLUDE_DIR "${ORC_PREFIX}/include")
+    set(ORC_SOURCE_DIR "${ORC_PREFIX}/cpp")
+    set(ORC_BUILD_DIR "${CMAKE_BINARY_DIR}/build/orc")
+
+    set(ORC_STATIC_LIB "${ORC_PREFIX}/lib/liborc.a")
+
+    message("ORC_STATIC_LIB IS ${ORC_STATIC_LIB}")
+    message("ORC_CMAKE_CXX_FLAGS ${ORC_CMAKE_CXX_FLAGS}")
+    message("ORC_CMAKE_C_FLAGS ${ORC_CMAKE_C_FLAGS}")
+
+    set(ORC_CMAKE_ARGS
+        ${EP_COMMON_CMAKE_ARGS}
+        "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}"
+        "-DCMAKE_CXX_FLAGS=${ORC_CMAKE_CXX_FLAGS}"
+        "-DCMAKE_C_FLAGS=${ORC_CMAKE_C_FLAGS}"
+        "-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${ORC_CMAKE_CXX_FLAGS}"
+        "-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath=${ORC_RPATH}"
+        "-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath=${ORC_RPATH}"
+        "-DCMAKE_MODULE_LINKER_FLAGS=-Wl,-rpath=${ORC_RPATH}"
+        "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}"
+        "-DLZ4_HOME=${ORC_LZ4_ROOT}"
+        "-DZSTD_HOME=${ORC_ZSTD_ROOT}"
+        "-DZLIB_HOME=${ORC_ZLIB_ROOT}"
+        "-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}"
+        "-DProtobuf_ROOT=${ORC_PROTOBUF_ROOT}"
+        -DBUILD_JAVA=OFF
+        -DBUILD_CPP_TESTS=OFF
+        -DBUILD_TOOLS=OFF
+        -DBUILD_CPP_ENABLE_METRICS=ON)
+
+    set(PATCH_FILE "${CMAKE_CURRENT_LIST_DIR}/orc.diff")
+    externalproject_add(orc_ep
+                        URL ${ORC_SOURCE_URL}
+                        URL_HASH "SHA256=${PAIMON_ORC_BUILD_SHA256_CHECKSUM}"
+                        SOURCE_DIR ${ORC_SOURCE_DIR}
+                        BINARY_DIR ${ORC_BUILD_DIR}
+                        CMAKE_ARGS ${ORC_CMAKE_ARGS}
+                        LOG_PATCH ON
+                        PATCH_COMMAND ${CMAKE_COMMAND} -E chdir <SOURCE_DIR> 
bash -c
+                                      "[ -f .patched ] && echo '<SOURCE_DIR> 
patch already applied, ignore...' || patch -s -N -p1 -i '${PATCH_FILE}' && 
touch .patched"
+                        UPDATE_DISCONNECTED 1
+                        BUILD_BYPRODUCTS ${ORC_STATIC_LIB}
+                        DEPENDS zstd
+                                snappy
+                                lz4
+                                zlib
+                                libprotobuf)
+
+    # The include directory must exist before it is referenced by a target.
+    file(MAKE_DIRECTORY "${ORC_INCLUDE_DIR}")
+
+    add_library(orc::orc STATIC IMPORTED)
+    set_target_properties(orc::orc
+                          PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}"
+                                     INTERFACE_INCLUDE_DIRECTORIES 
"${ORC_INCLUDE_DIR}")
+    target_link_libraries(orc::orc
+                          INTERFACE zstd
+                                    snappy
+                                    lz4
+                                    zlib
+                                    libprotobuf)
+
+    add_dependencies(orc::orc orc_ep)
+endmacro()
+
+macro(build_arrow)
+    message(STATUS "Building Arrow from source")
+
+    get_target_property(ARROW_SNAPPY_INCLUDE_DIR snappy 
INTERFACE_INCLUDE_DIRECTORIES)
+    get_filename_component(ARROW_SNAPPY_ROOT "${ARROW_SNAPPY_INCLUDE_DIR}" 
DIRECTORY)
+
+    get_target_property(ARROW_LZ4_INCLUDE_DIR lz4 
INTERFACE_INCLUDE_DIRECTORIES)
+    get_filename_component(ARROW_LZ4_ROOT "${ARROW_LZ4_INCLUDE_DIR}" DIRECTORY)
+
+    get_target_property(ARROW_ZSTD_INCLUDE_DIR zstd 
INTERFACE_INCLUDE_DIRECTORIES)
+    get_filename_component(ARROW_ZSTD_ROOT "${ARROW_ZSTD_INCLUDE_DIR}" 
DIRECTORY)
+
+    get_target_property(ARROW_ZLIB_INCLUDE_DIR zlib 
INTERFACE_INCLUDE_DIRECTORIES)
+    get_filename_component(ARROW_ZLIB_ROOT "${ARROW_ZLIB_INCLUDE_DIR}" 
DIRECTORY)
+
+    get_target_property(ARROW_RE2_INCLUDE_DIR re2::re2 
INTERFACE_INCLUDE_DIRECTORIES)
+    get_filename_component(ARROW_RE2_ROOT "${ARROW_RE2_INCLUDE_DIR}" DIRECTORY)
+
+    set(ARROW_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} -Wno-error")
+    set(ARROW_CMAKE_C_FLAGS "${EP_C_FLAGS} -Wno-error")
+    string(REPLACE "-Werror" "" ARROW_CMAKE_CXX_FLAGS ${ARROW_CMAKE_CXX_FLAGS})
+    # Fix for thrift Mutex.h missing #include <cstdint> (GCC 15 strictness)
+    # Use -include to force include cstdint for all C++ files
+    string(APPEND ARROW_CMAKE_CXX_FLAGS " -include cstdint")
+
+    set(ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-install")
+    set(ARROW_HOME "${ARROW_PREFIX}")
+    set(ARROW_SOURCE_DIR 
"${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix/src/arrow_ep")
+
+    set(_ARROW_LIBRARY_SUFFIX "${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(ARROW_INCLUDE_DIR "${ARROW_PREFIX}/include")
+
+    file(MAKE_DIRECTORY "${ARROW_INCLUDE_DIR}")
+
+    set(ARROW_BUILD_DIR "${CMAKE_BINARY_DIR}/arrow")
+    set(ARROW_STATIC_LIB
+        
"${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}arrow${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    set(ARROW_DATASET_STATIC_LIB
+        
"${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}arrow_dataset${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    set(ARROW_ACERO_STATIC_LIB
+        
"${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}arrow_acero${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    set(ARROW_BUNDLED_DEP_STATIC_LIB
+        
"${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}arrow_bundled_dependencies${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    set(PARQUET_STATIC_LIB
+        
"${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}parquet${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+
+    set(ARROW_CMAKE_ARGS
+        ${EP_COMMON_CMAKE_ARGS}
+        "-DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}"
+        "-DCMAKE_CXX_FLAGS=${ARROW_CMAKE_CXX_FLAGS}"
+        "-DCMAKE_C_FLAGS=${ARROW_CMAKE_C_FLAGS}"
+        "-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${ARROW_CMAKE_CXX_FLAGS}"
+        -DARROW_DEPENDENCY_USE_SHARED=OFF
+        -DARROW_BUILD_SHARED=OFF
+        -DARROW_BUILD_STATIC=ON
+        -DARROW_BUILD_TESTS=OFF
+        -DARROW_BUILD_BENCHMARKS=OFF
+        -DARROW_BUILD_EXAMPLES=OFF
+        -DARROW_JEMALLOC=OFF
+        -DARROW_WITH_RE2=ON
+        -DARROW_WITH_UTF8PROC=OFF
+        -DARROW_ORC=OFF
+        -DARROW_SIMD_LEVEL=NONE
+        -DARROW_RUNTIME_SIMD_LEVEL=NONE
+        -DARROW_PARQUET=ON
+        -DARROW_IPC=ON
+        -DARROW_DATASET=ON
+        -DARROW_JSON=ON
+        -DARROW_COMPUTE=ON
+        -DARROW_WITH_SNAPPY=ON
+        -DARROW_WITH_ZLIB=ON
+        -DARROW_WITH_LZ4=ON
+        -DARROW_WITH_ZSTD=ON
+        -DARROW_WITH_BZ2=OFF
+        -DARROW_WITH_BROTLI=ON
+        -DZSTD_ROOT=${ARROW_ZSTD_ROOT}
+        -DZLIB_ROOT=${ARROW_ZLIB_ROOT}
+        -DSnappy_ROOT=${ARROW_SNAPPY_ROOT}
+        -DLZ4_ROOT=${ARROW_LZ4_ROOT}
+        -Dre2_ROOT=${ARROW_RE2_ROOT}
+        -DBUILD_WARNING_LEVEL=PRODUCTION) # ignore warnings under gcc8
+
+    set(ARROW_CONFIGURE SOURCE_SUBDIR "cpp" CMAKE_ARGS ${ARROW_CMAKE_ARGS})
+    set(PATCH_FILE "${CMAKE_CURRENT_LIST_DIR}/arrow.diff")
+    externalproject_add(arrow_ep
+                        URL ${ARROW_SOURCE_URL}
+                        URL_HASH "SHA256=${PAIMON_ARROW_BUILD_SHA256_CHECKSUM}"
+                        LOG_PATCH ON
+                        PATCH_COMMAND ${CMAKE_COMMAND} -E chdir <SOURCE_DIR> 
bash -c
+                                      "[ -f .patched ] && echo '<SOURCE_DIR> 
patch already applied, ignore...' || patch -s -N -p1 -i '${PATCH_FILE}' && 
touch .patched"
+                        GIT_SUBMODULES "" GIT_SUBMODULES_RECURSE FALSE 
${ARROW_CONFIGURE}
+                        UPDATE_DISCONNECTED 1
+                        BUILD_BYPRODUCTS "${ARROW_STATIC_LIB}"
+                                         "${ARROW_BUNDLED_DEP_STATIC_LIB}"
+                                         "${PARQUET_STATIC_LIB}"
+                                         "${ARROW_DATASET_STATIC_LIB}"
+                                         "${ARROW_ACERO_STATIC_LIB}"
+                        DEPENDS zstd
+                                snappy
+                                lz4
+                                zlib
+                                re2::re2)
+
+    add_library(arrow STATIC IMPORTED)
+    set_target_properties(arrow
+                          PROPERTIES IMPORTED_LOCATION 
"${ARROW_PREFIX}/lib/libarrow.a"
+                                     INTERFACE_INCLUDE_DIRECTORIES 
"${ARROW_INCLUDE_DIR}"
+                                     INTERFACE_LINK_DIRECTORIES
+                                     
"${ARROW_BUILD_DIR}/${LOWERCASE_BUILD_TYPE}")
+
+    add_library(arrow_dataset STATIC IMPORTED)
+    set_target_properties(arrow_dataset
+                          PROPERTIES IMPORTED_LOCATION
+                                     "${ARROW_PREFIX}/lib/libarrow_dataset.a"
+                                     INTERFACE_INCLUDE_DIRECTORIES 
"${ARROW_INCLUDE_DIR}"
+                                     INTERFACE_LINK_DIRECTORIES
+                                     
"${ARROW_BUILD_DIR}/${LOWERCASE_BUILD_TYPE}")
+
+    add_library(arrow_acero STATIC IMPORTED)
+    set_target_properties(arrow_acero
+                          PROPERTIES IMPORTED_LOCATION
+                                     "${ARROW_PREFIX}/lib/libarrow_acero.a"
+                                     INTERFACE_INCLUDE_DIRECTORIES 
"${ARROW_INCLUDE_DIR}"
+                                     INTERFACE_LINK_DIRECTORIES
+                                     
"${ARROW_BUILD_DIR}/${LOWERCASE_BUILD_TYPE}")
+
+    add_library(parquet STATIC IMPORTED)
+    set_target_properties(parquet
+                          PROPERTIES IMPORTED_LOCATION 
"${ARROW_PREFIX}/lib/libparquet.a"
+                                     INTERFACE_INCLUDE_DIRECTORIES 
"${ARROW_INCLUDE_DIR}"
+                                     INTERFACE_LINK_DIRECTORIES
+                                     
"${ARROW_BUILD_DIR}/${LOWERCASE_BUILD_TYPE}")
+
+    add_library(arrow_bundled_dependencies STATIC IMPORTED)
+    set_target_properties(arrow_bundled_dependencies
+                          PROPERTIES IMPORTED_LOCATION
+                                     
"${ARROW_PREFIX}/lib/libarrow_bundled_dependencies.a"
+                                     INTERFACE_INCLUDE_DIRECTORIES 
"${ARROW_INCLUDE_DIR}"
+                                     INTERFACE_LINK_DIRECTORIES
+                                     
"${ARROW_BUILD_DIR}/${LOWERCASE_BUILD_TYPE}")
+
+    add_dependencies(arrow arrow_ep)
+    add_dependencies(parquet arrow_ep)
+    add_dependencies(arrow_bundled_dependencies arrow_ep)
+    add_dependencies(arrow_dataset arrow_ep)
+    add_dependencies(arrow_acero arrow_ep)
+
+    target_link_libraries(arrow_acero INTERFACE arrow)
+
+    target_link_libraries(arrow_dataset INTERFACE arrow_acero)
+
+    target_link_libraries(arrow
+                          INTERFACE zstd
+                                    snappy
+                                    lz4
+                                    zlib
+                                    re2::re2
+                                    arrow_bundled_dependencies)
+
+    target_link_libraries(parquet
+                          INTERFACE zstd
+                                    snappy
+                                    lz4
+                                    zlib
+                                    arrow_bundled_dependencies
+                                    arrow_dataset)
+
+endmacro(build_arrow)
+
+macro(build_gtest)
+    message(STATUS "Building gtest from source")
+
+    set(GTEST_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} -Wno-error")
+    string(REPLACE "-Werror" "" GTEST_CMAKE_CXX_FLAGS ${GTEST_CMAKE_CXX_FLAGS})
+
+    set(GTEST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/googletest_ep-install")
+    set(GTEST_INCLUDE_DIR "${GTEST_PREFIX}/include")
+
+    set(_GTEST_RUNTIME_DIR ${BUILD_OUTPUT_ROOT_DIRECTORY})
+
+    # Library and runtime same on non-Windows
+    set(_GTEST_LIBRARY_DIR "${_GTEST_RUNTIME_DIR}")
+
+    if(LOWERCASE_BUILD_TYPE STREQUAL "debug")
+        set(GTEST_STATIC_LIB 
"${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtestd.a")
+        set(GMOCK_STATIC_LIB 
"${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gmockd.a")
+        set(GTEST_MAIN_STATIC_LIB
+            "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest_maind.a")
+    else()
+        set(GTEST_STATIC_LIB 
"${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest.a")
+        set(GMOCK_STATIC_LIB 
"${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gmock.a")
+        set(GTEST_MAIN_STATIC_LIB
+            "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest_main.a")
+    endif()
+    set(GTEST_CMAKE_ARGS
+        ${EP_COMMON_CMAKE_ARGS}
+        "-DCMAKE_INSTALL_PREFIX=${GTEST_PREFIX}"
+        "-DCMAKE_CXX_FLAGS=${GTEST_CMAKE_CXX_FLAGS}"
+        "-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${GTEST_CMAKE_CXX_FLAGS}"
+        "-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${_GTEST_RUNTIME_DIR}"
+        
"-DCMAKE_RUNTIME_OUTPUT_DIRECTORY_${CMAKE_BUILD_TYPE}=${_GTEST_RUNTIME_DIR}")
+
+    externalproject_add(googletest_ep
+                        URL ${GTEST_SOURCE_URL}
+                        URL_HASH "SHA256=${PAIMON_GTEST_BUILD_SHA256_CHECKSUM}"
+                        CMAKE_ARGS ${GTEST_CMAKE_ARGS}
+                        BUILD_BYPRODUCTS "${GTEST_STATIC_LIB}" 
"${GTEST_MAIN_STATIC_LIB}"
+                                         "${GMOCK_STATIC_LIB}")
+
+    # The include directory must exist before it is referenced by a target.
+    file(MAKE_DIRECTORY "${GTEST_INCLUDE_DIR}")
+
+    add_library(GTest::gtest STATIC IMPORTED)
+    set_target_properties(GTest::gtest
+                          PROPERTIES IMPORTED_LOCATION "${GTEST_STATIC_LIB}"
+                                     INTERFACE_INCLUDE_DIRECTORIES 
"${GTEST_INCLUDE_DIR}")
+
+    add_library(GTest::gtest_main STATIC IMPORTED)
+    set_target_properties(GTest::gtest_main
+                          PROPERTIES IMPORTED_LOCATION 
"${GTEST_MAIN_STATIC_LIB}"
+                                     INTERFACE_INCLUDE_DIRECTORIES 
"${GTEST_INCLUDE_DIR}")
+
+    add_library(GTest::gmock STATIC IMPORTED)
+    set_target_properties(GTest::gmock
+                          PROPERTIES IMPORTED_LOCATION "${GMOCK_STATIC_LIB}"
+                                     INTERFACE_INCLUDE_DIRECTORIES 
"${GTEST_INCLUDE_DIR}")
+    add_dependencies(GTest::gtest googletest_ep)
+    add_dependencies(GTest::gtest_main googletest_ep)
+    add_dependencies(GTest::gmock googletest_ep)
+
+    find_package(Threads REQUIRED)
+    set(GTEST_LINK_TOOLCHAIN GTest::gtest_main GTest::gtest GTest::gmock 
Threads::Threads)
+endmacro()
+
+macro(build_tbb)
+    message(STATUS "Building Tbb from source")
+
+    set(TBB_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} -Wno-error")
+    set(TBB_CMAKE_C_FLAGS "${EP_C_FLAGS} -Wno-error")
+    string(REPLACE "-Werror" "" TBB_CMAKE_CXX_FLAGS ${TBB_CMAKE_CXX_FLAGS})
+
+    string(REPLACE "-Wdocumentation" "" TBB_CMAKE_CXX_FLAGS 
${TBB_CMAKE_CXX_FLAGS})
+    string(REPLACE "-Wdocumentation" "" TBB_CMAKE_C_FLAGS ${TBB_CMAKE_C_FLAGS})
+
+    set(TBB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/tbb_ep-install")
+
+    if(LOWERCASE_BUILD_TYPE STREQUAL "debug")
+        set(TBB_STATIC_LIB 
"${TBB_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}tbb_debug.a")
+    else()
+        set(TBB_STATIC_LIB 
"${TBB_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}tbb.a")
+    endif()
+    set(TBB_INCLUDE_DIR "${TBB_PREFIX}/include")
+
+    file(MAKE_DIRECTORY "${TBB_INCLUDE_DIR}")
+
+    set(TBB_BUILD_DIR "${CMAKE_BINARY_DIR}/tbb")
+
+    set(TBB_CMAKE_ARGS
+        ${EP_COMMON_CMAKE_ARGS}
+        "-DCMAKE_INSTALL_PREFIX=${TBB_PREFIX}"
+        "-DCMAKE_CXX_FLAGS=${TBB_CMAKE_CXX_FLAGS}"
+        "-DCMAKE_C_FLAGS=${TBB_CMAKE_C_FLAGS}"
+        "-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${TBB_CMAKE_CXX_FLAGS}"
+        -DTBB_TEST=OFF)
+
+    externalproject_add(tbb_ep
+                        URL ${TBB_SOURCE_URL}
+                        URL_HASH "SHA256=${PAIMON_TBB_BUILD_SHA256_CHECKSUM}"
+                        CMAKE_ARGS ${TBB_CMAKE_ARGS}
+                        BUILD_BYPRODUCTS "${TBB_STATIC_LIB}")
+
+    add_library(tbb STATIC IMPORTED)
+    set_target_properties(tbb
+                          PROPERTIES IMPORTED_LOCATION "${TBB_STATIC_LIB}"
+                                     INTERFACE_INCLUDE_DIRECTORIES 
"${TBB_INCLUDE_DIR}"
+                                     INTERFACE_LINK_DIRECTORIES
+                                     
"${TBB_BUILD_DIR}/${LOWERCASE_BUILD_TYPE}")
+    add_dependencies(tbb tbb_ep)
+
+endmacro(build_tbb)
+
+macro(build_glog)
+    message(STATUS "Building glog from source")
+    set(GLOG_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/glog_ep-install")
+    set(GLOG_INCLUDE_DIR "${GLOG_PREFIX}/include")
+    if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG")
+        set(GLOG_LIB_SUFFIX "d")
+    else()
+        set(GLOG_LIB_SUFFIX "")
+    endif()
+    set(GLOG_STATIC_LIB "${GLOG_PREFIX}/lib/libglog${GLOG_LIB_SUFFIX}.a")
+    set(GLOG_CMAKE_CXX_FLAGS " -Wno-error ${EP_CXX_FLAGS}")
+    set(GLOG_CMAKE_C_FLAGS " -Wno-error ${EP_C_FLAGS}")
+    if(CMAKE_THREAD_LIBS_INIT)
+        string(APPEND GLOG_CMAKE_CXX_FLAGS " ${CMAKE_THREAD_LIBS_INIT}")
+        string(APPEND GLOG_CMAKE_C_FLAGS " ${CMAKE_THREAD_LIBS_INIT}")
+    endif()
+
+    set(GLOG_CMAKE_ARGS
+        ${EP_COMMON_CMAKE_ARGS}
+        -DCMAKE_INSTALL_PREFIX=${GLOG_PREFIX}
+        -DWITH_GFLAGS=OFF
+        -DWITH_GTEST=OFF
+        -DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS}
+        -DCMAKE_C_FLAGS=${GLOG_CMAKE_C_FLAGS})
+
+    externalproject_add(glog_ep
+                        URL ${GLOG_SOURCE_URL}
+                        URL_HASH "SHA256=${PAIMON_GLOG_BUILD_SHA256_CHECKSUM}"
+                        CMAKE_ARGS ${GLOG_CMAKE_ARGS}
+                        BUILD_BYPRODUCTS "${GLOG_STATIC_LIB}")
+
+    file(MAKE_DIRECTORY "${GLOG_INCLUDE_DIR}")
+    add_library(glog STATIC IMPORTED)
+    set_target_properties(glog
+                          PROPERTIES IMPORTED_LOCATION "${GLOG_STATIC_LIB}"
+                                     INTERFACE_INCLUDE_DIRECTORIES 
"${GLOG_INCLUDE_DIR}"
+                                     INTERFACE_LINK_DIRECTORIES 
"${GLOG_BUILD_DIR}/lib"
+                                     INTERFACE_COMPILE_DEFINITIONS 
"GLOG_USE_GLOG_EXPORT")
+
+    add_dependencies(glog glog_ep)
+
+    find_library(LIBUNWIND_LIBRARY NAMES unwind)
+    if(LIBUNWIND_LIBRARY)
+        target_link_libraries(glog INTERFACE ${LIBUNWIND_LIBRARY})
+    endif()
+endmacro()
+
+resolve_dependency(fmt)
+resolve_dependency(RapidJSON)
+paimon_apply_dependency_source_defaults()
+resolve_dependency(RE2)
+resolve_dependency(Snappy)
+resolve_dependency(zstd)
+resolve_dependency(ZLIB)
+resolve_dependency(LZ4)
+resolve_dependency(Arrow)
+paimon_warn_if_mixed_arrow_dependencies()
+resolve_dependency(TBB)
+resolve_dependency(glog)
+
+if(PAIMON_ENABLE_AVRO)
+    resolve_dependency(Avro)
+endif()
+if(PAIMON_ENABLE_ORC)
+    resolve_dependency(Protobuf)
+    resolve_dependency(ORC)
+endif()
+if(PAIMON_ENABLE_JINDO)
+    build_jindosdk_c()
+    build_jindosdk_nextarch()
+endif()
+if(PAIMON_ENABLE_LUCENE)
+    build_boost()
+    build_lucene()
+    build_jieba()
+endif()
diff --git a/cmake_modules/arrow.diff b/cmake_modules/arrow.diff
new file mode 100644
index 0000000..e539d1f
--- /dev/null
+++ b/cmake_modules/arrow.diff
@@ -0,0 +1,213 @@
+diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
+index ec3890a41f..943f69bb6c 100644
+--- a/cpp/src/parquet/arrow/schema.cc
++++ b/cpp/src/parquet/arrow/schema.cc
+@@ -178,7 +178,7 @@ static Status GetTimestampMetadata(const 
::arrow::TimestampType& type,
+
+   // The user is explicitly asking for Impala int96 encoding, there is no
+   // logical type.
+-  if (arrow_properties.support_deprecated_int96_timestamps()) {
++  if (arrow_properties.support_deprecated_int96_timestamps() && target_unit 
== ::arrow::TimeUnit::NANO) {
+     *physical_type = ParquetType::INT96;
+     return Status::OK();
+   }
+
+diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc
+index 285e2a5973..aa6f92f077 100644
+--- a/cpp/src/parquet/arrow/reader.cc
++++ b/cpp/src/parquet/arrow/reader.cc
+@@ -1013,25 +1013,32 @@ Status FileReaderImpl::GetRecordBatchReader(const 
std::vector<int>& row_groups,
+     return Status::OK();
+   }
+
+-  int64_t num_rows = 0;
++  std::vector<int64_t> num_rows;
+   for (int row_group : row_groups) {
+-    num_rows += parquet_reader()->metadata()->RowGroup(row_group)->num_rows();
++    
num_rows.push_back(parquet_reader()->metadata()->RowGroup(row_group)->num_rows());
+   }
+
+   using ::arrow::RecordBatchIterator;
++  int row_group_idx = 0;
+
+   // NB: This lambda will be invoked outside the scope of this call to
+   // `GetRecordBatchReader()`, so it must capture `readers` and 
`batch_schema` by value.
+   // `this` is a non-owning pointer so we are relying on the parent 
FileReader outliving
+   // this RecordBatchReader.
+   ::arrow::Iterator<RecordBatchIterator> batches = 
::arrow::MakeFunctionIterator(
+-      [readers, batch_schema, num_rows,
++      [readers, batch_schema, num_rows, row_group_idx,
+        this]() mutable -> ::arrow::Result<RecordBatchIterator> {
+         ::arrow::ChunkedArrayVector columns(readers.size());
+
+-        // don't reserve more rows than necessary
+-        int64_t batch_size = std::min(properties().batch_size(), num_rows);
+-        num_rows -= batch_size;
++        int64_t batch_size = 0;
++        if (!num_rows.empty()) {
++          // don't reserve more rows than necessary
++          batch_size = std::min(properties().batch_size(), 
num_rows[row_group_idx]);
++          num_rows[row_group_idx] -= batch_size;
++          if (num_rows[row_group_idx] == 0 && (num_rows.size() - 1) != 
row_group_idx) {
++            row_group_idx++;
++          }
++        }
+
+         RETURN_NOT_OK(::arrow::internal::OptionalParallelFor(
+             reader_properties_.use_threads(), 
static_cast<int>(readers.size()),
+diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc
+index 4fd7ef1b47..87326a54f1 100644
+--- a/cpp/src/parquet/arrow/writer.cc
++++ b/cpp/src/parquet/arrow/writer.cc
+@@ -314,6 +314,14 @@ class FileWriterImpl : public FileWriter {
+     return Status::OK();
+   }
+
++  int64_t GetBufferedSize() override {
++    if (row_group_writer_ == nullptr) {
++      return 0;
++    }
++    return row_group_writer_->total_compressed_bytes() +
++      row_group_writer_->total_compressed_bytes_written();
++  }
++
+   Status Close() override {
+     if (!closed_) {
+       // Make idempotent
+@@ -418,10 +426,13 @@ class FileWriterImpl : public FileWriter {
+
+     // Max number of rows allowed in a row group.
+     const int64_t max_row_group_length = 
this->properties().max_row_group_length();
++    const int64_t max_row_group_size = 
this->properties().max_row_group_size();
+
+     // Initialize a new buffered row group writer if necessary.
+     if (row_group_writer_ == nullptr || !row_group_writer_->buffered() ||
+-        row_group_writer_->num_rows() >= max_row_group_length) {
++        row_group_writer_->num_rows() >= max_row_group_length ||
++        (row_group_writer_->total_compressed_bytes_written() +
++         row_group_writer_->total_compressed_bytes() >= max_row_group_size)) {
+       RETURN_NOT_OK(NewBufferedRowGroup());
+     }
+
+diff --git a/cpp/src/parquet/arrow/writer.h b/cpp/src/parquet/arrow/writer.h
+index 4a1a033a7b..0f13d05e44 100644
+--- a/cpp/src/parquet/arrow/writer.h
++++ b/cpp/src/parquet/arrow/writer.h
+@@ -138,6 +138,9 @@ class PARQUET_EXPORT FileWriter {
+   /// option in this case.
+   virtual ::arrow::Status WriteRecordBatch(const ::arrow::RecordBatch& batch) 
= 0;
+
++  /// \brief Return the buffered size in bytes.
++  virtual int64_t GetBufferedSize() = 0;
++
+   /// \brief Write the footer and close the file.
+   virtual ::arrow::Status Close() = 0;
+   virtual ~FileWriter();
+diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h
+index 4d3acb491e..3906ff3c59 100644
+--- a/cpp/src/parquet/properties.h
++++ b/cpp/src/parquet/properties.h
+@@ -139,6 +139,7 @@ static constexpr bool DEFAULT_IS_DICTIONARY_ENABLED = true;
+ static constexpr int64_t DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT = 
kDefaultDataPageSize;
+ static constexpr int64_t DEFAULT_WRITE_BATCH_SIZE = 1024;
+ static constexpr int64_t DEFAULT_MAX_ROW_GROUP_LENGTH = 1024 * 1024;
++static constexpr int64_t DEFAULT_MAX_ROW_GROUP_SIZE = 128 * 1024 * 1024;
+ static constexpr bool DEFAULT_ARE_STATISTICS_ENABLED = true;
+ static constexpr int64_t DEFAULT_MAX_STATISTICS_SIZE = 4096;
+ static constexpr Encoding::type DEFAULT_ENCODING = Encoding::UNKNOWN;
+@@ -232,6 +233,7 @@ class PARQUET_EXPORT WriterProperties {
+           dictionary_pagesize_limit_(DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT),
+           write_batch_size_(DEFAULT_WRITE_BATCH_SIZE),
+           max_row_group_length_(DEFAULT_MAX_ROW_GROUP_LENGTH),
++          max_row_group_size_(DEFAULT_MAX_ROW_GROUP_SIZE),
+           pagesize_(kDefaultDataPageSize),
+           version_(ParquetVersion::PARQUET_2_6),
+           data_page_version_(ParquetDataPageVersion::V1),
+@@ -244,6 +246,7 @@ class PARQUET_EXPORT WriterProperties {
+           dictionary_pagesize_limit_(properties.dictionary_pagesize_limit()),
+           write_batch_size_(properties.write_batch_size()),
+           max_row_group_length_(properties.max_row_group_length()),
++          max_row_group_size_(properties.max_row_group_size()),
+           pagesize_(properties.data_pagesize()),
+           version_(properties.version()),
+           data_page_version_(properties.data_page_version()),
+@@ -321,6 +324,13 @@ class PARQUET_EXPORT WriterProperties {
+       return this;
+     }
+
++    /// Specify the max bytes size to put in a single row group.
++    /// Default 128 M.
++    Builder* max_row_group_size(int64_t max_row_group_size) {
++      max_row_group_size_ = max_row_group_size;
++      return this;
++    }
++
+     /// Specify the data page size.
+     /// Default 1MB.
+     Builder* data_pagesize(int64_t pg_size) {
+@@ -664,7 +674,7 @@ class PARQUET_EXPORT WriterProperties {
+
+       return std::shared_ptr<WriterProperties>(new WriterProperties(
+           pool_, dictionary_pagesize_limit_, write_batch_size_, 
max_row_group_length_,
+-          pagesize_, version_, created_by_, page_checksum_enabled_,
++          max_row_group_size_, pagesize_, version_, created_by_, 
page_checksum_enabled_,
+           std::move(file_encryption_properties_), default_column_properties_,
+           column_properties, data_page_version_, store_decimal_as_integer_,
+           std::move(sorting_columns_)));
+@@ -675,6 +685,7 @@ class PARQUET_EXPORT WriterProperties {
+     int64_t dictionary_pagesize_limit_;
+     int64_t write_batch_size_;
+     int64_t max_row_group_length_;
++    int64_t max_row_group_size_;
+     int64_t pagesize_;
+     ParquetVersion::type version_;
+     ParquetDataPageVersion data_page_version_;
+@@ -705,6 +716,8 @@ class PARQUET_EXPORT WriterProperties {
+
+   inline int64_t max_row_group_length() const { return max_row_group_length_; 
}
+
++  inline int64_t max_row_group_size() const { return max_row_group_size_; }
++
+   inline int64_t data_pagesize() const { return pagesize_; }
+
+   inline ParquetDataPageVersion data_page_version() const {
+@@ -810,7 +823,7 @@ class PARQUET_EXPORT WriterProperties {
+  private:
+   explicit WriterProperties(
+       MemoryPool* pool, int64_t dictionary_pagesize_limit, int64_t 
write_batch_size,
+-      int64_t max_row_group_length, int64_t pagesize, ParquetVersion::type 
version,
++      int64_t max_row_group_length, int64_t max_row_group_size, int64_t 
pagesize, ParquetVersion::type version,
+       const std::string& created_by, bool page_write_checksum_enabled,
+       std::shared_ptr<FileEncryptionProperties> file_encryption_properties,
+       const ColumnProperties& default_column_properties,
+@@ -821,6 +834,7 @@ class PARQUET_EXPORT WriterProperties {
+         dictionary_pagesize_limit_(dictionary_pagesize_limit),
+         write_batch_size_(write_batch_size),
+         max_row_group_length_(max_row_group_length),
++        max_row_group_size_(max_row_group_size),
+         pagesize_(pagesize),
+         parquet_data_page_version_(data_page_version),
+         parquet_version_(version),
+@@ -836,6 +850,7 @@ class PARQUET_EXPORT WriterProperties {
+   int64_t dictionary_pagesize_limit_;
+   int64_t write_batch_size_;
+   int64_t max_row_group_length_;
++  int64_t max_row_group_size_;
+   int64_t pagesize_;
+   ParquetDataPageVersion parquet_data_page_version_;
+   ParquetVersion::type parquet_version_;
+diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake 
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
+--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
++++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
+@@ -981,6 +981,11 @@ if(CMAKE_TOOLCHAIN_FILE)
+   list(APPEND EP_COMMON_CMAKE_ARGS 
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE})
+ endif()
+
++# Compatibility with bundled dependencies that require old CMake versions.
++if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.30")
++  list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_POLICY_VERSION_MINIMUM=3.5)
++endif()
++
+ # and crosscompiling emulator (for try_run() )
+ if(CMAKE_CROSSCOMPILING_EMULATOR)
+   string(REPLACE ";" ${EP_LIST_SEPARATOR} EP_CMAKE_CROSSCOMPILING_EMULATOR
diff --git a/cmake_modules/jieba.diff b/cmake_modules/jieba.diff
new file mode 100644
index 0000000..d74c3f6
--- /dev/null
+++ b/cmake_modules/jieba.diff
@@ -0,0 +1,16 @@
+diff --git a/include/cppjieba/KeywordExtractor.hpp 
b/include/cppjieba/KeywordExtractor.hpp
+index 24b2c40..c7c6a94 100644
+--- a/include/cppjieba/KeywordExtractor.hpp
++++ b/include/cppjieba/KeywordExtractor.hpp
+@@ -89,6 +89,11 @@ class KeywordExtractor {
+     std::partial_sort(keywords.begin(), keywords.begin() + topN, 
keywords.end(), Compare);
+     keywords.resize(topN);
+   }
++
++  const std::unordered_set<std::string>& GetStopWords() const {
++    return stopWords_;
++  }
++
+  private:
+   void LoadIdfDict(const std::string& idfPath) {
+     std::ifstream ifs(idfPath.c_str());
diff --git a/cmake_modules/orc.diff b/cmake_modules/orc.diff
new file mode 100644
index 0000000..e4ca4e2
--- /dev/null
+++ b/cmake_modules/orc.diff
@@ -0,0 +1,437 @@
+diff --git a/c++/include/orc/MemoryPool.hh b/c++/include/orc/MemoryPool.hh
+index a914e5f26..efe1d4933 100644
+--- a/c++/include/orc/MemoryPool.hh
++++ b/c++/include/orc/MemoryPool.hh
+@@ -42,13 +42,15 @@ namespace orc {
+     uint64_t currentSize_;
+     // maximal capacity (actual allocated memory)
+     uint64_t currentCapacity_;
++    // flag to indicate whether it needs to manage buffer or not
++    bool ownBuffer_;
+
+     // not implemented
+     DataBuffer(DataBuffer& buffer);
+     DataBuffer& operator=(DataBuffer& buffer);
+
+    public:
+-    DataBuffer(MemoryPool& pool, uint64_t size = 0);
++    DataBuffer(MemoryPool& pool, uint64_t size = 0, bool ownBuf = true);
+
+     DataBuffer(DataBuffer<T>&& buffer) noexcept;
+
+@@ -81,6 +83,10 @@ namespace orc {
+     void reserve(uint64_t size);
+     void resize(uint64_t size);
+     void zeroOut();
++
++    // set external buffer
++    void setData(T* buf, size_t bufSize);
++
+   };
+
+   // Specializations for char
+diff --git a/c++/include/orc/Reader.hh b/c++/include/orc/Reader.hh
+index b015b6491..585e50ec5 100644
+--- a/c++/include/orc/Reader.hh
++++ b/c++/include/orc/Reader.hh
+@@ -659,6 +659,9 @@ namespace orc {
+     virtual void preBuffer(const std::vector<uint32_t>& stripes,
+                            const std::list<uint64_t>& includeTypes) = 0;
+
++    virtual std::vector<std::pair<uint64_t, uint64_t>> preBufferRange(
++        const std::vector<uint32_t>& stripes, const std::list<uint64_t>& 
includeTypes) = 0;
++
+     /**
+      * Release cached entries whose right boundary is less than or equal to 
the given boundary.
+      * @param boundary the boundary value to release cache entries
+diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
+index af434c37c..08393259c 100644
+--- a/c++/src/ColumnReader.cc
++++ b/c++/src/ColumnReader.cc
+@@ -332,7 +332,13 @@ namespace orc {
+             nanoBuffer[i] *= 10;
+           }
+         }
++
++        // ORC-306: compensate -1s for JDK bug in java.sql.Timestamp
+         int64_t writerTime = secsBuffer[i] + epochOffset_;
++        if (writerTime < 0 && nanoBuffer[i] > 999999) {
++            writerTime -= 1;
++        }
++
+         if (!sameTimezone_) {
+           // adjust timestamp value to same wall clock time if writer and 
reader
+           // time zones have different rules, which is required for Apache 
Orc.
+@@ -347,9 +353,6 @@ namespace orc {
+           }
+         }
+         secsBuffer[i] = writerTime;
+-        if (secsBuffer[i] < 0 && nanoBuffer[i] > 999999) {
+-          secsBuffer[i] -= 1;
+-        }
+       }
+     }
+   }
+diff --git a/c++/src/MemoryPool.cc b/c++/src/MemoryPool.cc
+index ed7fee737..a8ee8a67c 100644
+--- a/c++/src/MemoryPool.cc
++++ b/c++/src/MemoryPool.cc
+@@ -52,8 +52,8 @@ namespace orc {
+   }
+
+   template <class T>
+-  DataBuffer<T>::DataBuffer(MemoryPool& pool, uint64_t newSize)
+-      : memoryPool_(pool), buf_(nullptr), currentSize_(0), 
currentCapacity_(0) {
++  DataBuffer<T>::DataBuffer(MemoryPool& pool, uint64_t newSize, bool ownBuf)
++      : memoryPool_(pool), buf_(nullptr), currentSize_(0), 
currentCapacity_(0), ownBuffer_(ownBuf) {
+     reserve(newSize);
+     currentSize_ = newSize;
+   }
+@@ -63,24 +63,35 @@ namespace orc {
+       : memoryPool_(buffer.memoryPool_),
+         buf_(buffer.buf_),
+         currentSize_(buffer.currentSize_),
+-        currentCapacity_(buffer.currentCapacity_) {
+-    buffer.buf_ = nullptr;
+-    buffer.currentSize_ = 0;
+-    buffer.currentCapacity_ = 0;
++        currentCapacity_(buffer.currentCapacity_),
++        ownBuffer_(buffer.ownBuffer_) {
++    if (buffer.ownBuffer_) {
++      buffer.buf_ = nullptr;
++      buffer.currentSize_ = 0;
++      buffer.currentCapacity_ = 0;
++    }
+   }
+
+   template <class T>
+   DataBuffer<T>::~DataBuffer() {
++    if (!ownBuffer_) {
++      return;
++    }
+     for (uint64_t i = currentSize_; i > 0; --i) {
+       (buf_ + i - 1)->~T();
+     }
+     if (buf_) {
++      static_assert(std::is_trivially_copyable<T>::value,
++                    "Only trivially copyable type is supported for DataBuffer 
Reserve");
+       memoryPool_.free(reinterpret_cast<char*>(buf_));
+     }
+   }
+
+   template <class T>
+   void DataBuffer<T>::resize(uint64_t newSize) {
++    if (!ownBuffer_) {
++      return;
++    }
+     reserve(newSize);
+     if (currentSize_ > newSize) {
+       for (uint64_t i = currentSize_; i > newSize; --i) {
+@@ -96,6 +107,9 @@ namespace orc {
+
+   template <class T>
+   void DataBuffer<T>::reserve(uint64_t newCapacity) {
++    if (!ownBuffer_) {
++      return;
++    }
+     if (newCapacity > currentCapacity_ || !buf_) {
+       if (buf_) {
+         T* buf_old = buf_;
+@@ -114,6 +128,18 @@ namespace orc {
+     memset(buf_, 0, sizeof(T) * currentCapacity_);
+   }
+
++  template <class T>
++  void DataBuffer<T>::setData(T* buffer, size_t bufSize) {
++    if (ownBuffer_ && buf_) {
++        static_assert(std::is_trivially_copyable<T>::value,
++                      "Only trivially copyable type is supported for 
DataBuffer Reserve");
++      memoryPool_.free(reinterpret_cast<char*>(buf_));
++    }
++    ownBuffer_ = false;
++    buf_ = buffer;
++    currentSize_ = currentCapacity_ = bufSize / sizeof(T);
++  }
++
+   // Specializations for Int128
+   template <>
+   void DataBuffer<Int128>::zeroOut() {
+@@ -126,13 +152,16 @@ namespace orc {
+
+   template <>
+   DataBuffer<char>::~DataBuffer() {
+-    if (buf_) {
++    if (ownBuffer_ && buf_) {
+       memoryPool_.free(reinterpret_cast<char*>(buf_));
+     }
+   }
+
+   template <>
+   void DataBuffer<char>::resize(uint64_t newSize) {
++    if (!ownBuffer_) {
++      return;
++    }
+     reserve(newSize);
+     if (newSize > currentSize_) {
+       memset(buf_ + currentSize_, 0, newSize - currentSize_);
+@@ -144,13 +173,16 @@ namespace orc {
+
+   template <>
+   DataBuffer<char*>::~DataBuffer() {
+-    if (buf_) {
++    if (ownBuffer_ && buf_) {
+       memoryPool_.free(reinterpret_cast<char*>(buf_));
+     }
+   }
+
+   template <>
+   void DataBuffer<char*>::resize(uint64_t newSize) {
++    if (!ownBuffer_) {
++      return;
++    }
+     reserve(newSize);
+     if (newSize > currentSize_) {
+       memset(buf_ + currentSize_, 0, (newSize - currentSize_) * 
sizeof(char*));
+@@ -162,13 +194,16 @@ namespace orc {
+
+   template <>
+   DataBuffer<double>::~DataBuffer() {
+-    if (buf_) {
++    if (ownBuffer_ && buf_) {
+       memoryPool_.free(reinterpret_cast<char*>(buf_));
+     }
+   }
+
+   template <>
+   void DataBuffer<double>::resize(uint64_t newSize) {
++    if (!ownBuffer_) {
++      return;
++    }
+     reserve(newSize);
+     if (newSize > currentSize_) {
+       memset(buf_ + currentSize_, 0, (newSize - currentSize_) * 
sizeof(double));
+@@ -180,13 +215,16 @@ namespace orc {
+
+   template <>
+   DataBuffer<float>::~DataBuffer() {
+-    if (buf_) {
++    if (ownBuffer_ && buf_) {
+       memoryPool_.free(reinterpret_cast<char*>(buf_));
+     }
+   }
+
+   template <>
+   void DataBuffer<float>::resize(uint64_t newSize) {
++    if (!ownBuffer_) {
++      return;
++    }
+     reserve(newSize);
+     if (newSize > currentSize_) {
+       memset(buf_ + currentSize_, 0, (newSize - currentSize_) * 
sizeof(float));
+@@ -198,13 +236,17 @@ namespace orc {
+
+   template <>
+   DataBuffer<int64_t>::~DataBuffer() {
+-    if (buf_) {
++    if (ownBuffer_ && buf_) {
+       memoryPool_.free(reinterpret_cast<char*>(buf_));
+     }
+   }
+
+   template <>
+   void DataBuffer<int64_t>::resize(uint64_t newSize) {
++    if (!ownBuffer_) {
++      return;
++    }
++
+     reserve(newSize);
+     if (newSize > currentSize_) {
+       memset(buf_ + currentSize_, 0, (newSize - currentSize_) * 
sizeof(int64_t));
+@@ -216,13 +258,17 @@ namespace orc {
+
+   template <>
+   DataBuffer<int32_t>::~DataBuffer() {
+-    if (buf_) {
++    if (ownBuffer_ && buf_) {
+       memoryPool_.free(reinterpret_cast<char*>(buf_));
+     }
+   }
+
+   template <>
+   void DataBuffer<int32_t>::resize(uint64_t newSize) {
++    if (!ownBuffer_) {
++      return;
++    }
++
+     reserve(newSize);
+     if (newSize > currentSize_) {
+       memset(buf_ + currentSize_, 0, (newSize - currentSize_) * 
sizeof(int32_t));
+@@ -234,13 +280,17 @@ namespace orc {
+
+   template <>
+   DataBuffer<int16_t>::~DataBuffer() {
+-    if (buf_) {
++    if (ownBuffer_ && buf_) {
+       memoryPool_.free(reinterpret_cast<char*>(buf_));
+     }
+   }
+
+   template <>
+   void DataBuffer<int16_t>::resize(uint64_t newSize) {
++    if (!ownBuffer_) {
++      return;
++    }
++
+     reserve(newSize);
+     if (newSize > currentSize_) {
+       memset(buf_ + currentSize_, 0, (newSize - currentSize_) * 
sizeof(int16_t));
+@@ -252,13 +302,17 @@ namespace orc {
+
+   template <>
+   DataBuffer<int8_t>::~DataBuffer() {
+-    if (buf_) {
++    if (ownBuffer_ && buf_) {
+       memoryPool_.free(reinterpret_cast<char*>(buf_));
+     }
+   }
+
+   template <>
+   void DataBuffer<int8_t>::resize(uint64_t newSize) {
++    if (!ownBuffer_) {
++      return;
++    }
++
+     reserve(newSize);
+     if (newSize > currentSize_) {
+       memset(buf_ + currentSize_, 0, (newSize - currentSize_) * 
sizeof(int8_t));
+@@ -270,13 +324,17 @@ namespace orc {
+
+   template <>
+   DataBuffer<uint64_t>::~DataBuffer() {
+-    if (buf_) {
++    if (ownBuffer_ && buf_) {
+       memoryPool_.free(reinterpret_cast<char*>(buf_));
+     }
+   }
+
+   template <>
+   void DataBuffer<uint64_t>::resize(uint64_t newSize) {
++    if (!ownBuffer_) {
++      return;
++    }
++
+     reserve(newSize);
+     if (newSize > currentSize_) {
+       memset(buf_ + currentSize_, 0, (newSize - currentSize_) * 
sizeof(uint64_t));
+@@ -288,13 +346,17 @@ namespace orc {
+
+   template <>
+   DataBuffer<unsigned char>::~DataBuffer() {
+-    if (buf_) {
++    if (ownBuffer_ && buf_) {
+       memoryPool_.free(reinterpret_cast<char*>(buf_));
+     }
+   }
+
+   template <>
+   void DataBuffer<unsigned char>::resize(uint64_t newSize) {
++    if (!ownBuffer_) {
++      return;
++    }
++
+     reserve(newSize);
+     if (newSize > currentSize_) {
+       memset(buf_ + currentSize_, 0, newSize - currentSize_);
+diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
+index c93c62f6c..2a821b622 100644
+--- a/c++/src/Reader.cc
++++ b/c++/src/Reader.cc
+@@ -1531,8 +1531,8 @@ namespace orc {
+     }
+   }
+
+-  void ReaderImpl::preBuffer(const std::vector<uint32_t>& stripes,
+-                             const std::list<uint64_t>& includeTypes) {
++  std::vector<std::pair<uint64_t, uint64_t>> ReaderImpl::preBufferRange(
++      const std::vector<uint32_t>& stripes, const std::list<uint64_t>& 
includeTypes) {
+     std::vector<uint32_t> newStripes;
+     for (auto stripe : stripes) {
+       if (stripe < static_cast<uint32_t>(footer_->stripes_size())) 
newStripes.push_back(stripe);
+@@ -1544,7 +1544,7 @@ namespace orc {
+     }
+
+     if (newStripes.empty() || newIncludeTypes.empty()) {
+-      return;
++      return {};
+     }
+
+     orc::RowReaderOptions rowReaderOptions;
+@@ -1553,7 +1553,7 @@ namespace orc {
+     std::vector<bool> selectedColumns;
+     columnSelector.updateSelected(selectedColumns, rowReaderOptions);
+
+-    std::vector<ReadRange> ranges;
++    std::vector<std::pair<uint64_t, uint64_t>> ranges;
+     ranges.reserve(newIncludeTypes.size());
+     for (auto stripe : newStripes) {
+       // get stripe information
+@@ -1598,17 +1598,23 @@ namespace orc {
+
+         offset += stream.length();
+       }
++    }
++    return ranges;
++  }
+
+-      {
+-        std::lock_guard<std::mutex> lock(contents_->readCacheMutex);
+-
+-        if (!contents_->readCache) {
+-          contents_->readCache = std::make_shared<ReadRangeCache>(
+-              getStream(), options_.getCacheOptions(), contents_->pool, 
contents_->readerMetrics);
+-        }
+-        contents_->readCache->cache(std::move(ranges));
+-      }
++  void ReaderImpl::preBuffer(const std::vector<uint32_t>& stripes,
++                             const std::list<uint64_t>& includeTypes) {
++    auto ranges = preBufferRange(stripes, includeTypes);
++    std::vector<ReadRange> read_ranges;
++    for (const auto& range : ranges) {
++      read_ranges.emplace_back(range.first, range.second);
++    }
++    std::lock_guard<std::mutex> lock(contents_->readCacheMutex);
++    if (!contents_->readCache) {
++      contents_->readCache = std::make_shared<ReadRangeCache>(
++          getStream(), options_.getCacheOptions(), contents_->pool, 
contents_->readerMetrics);
+     }
++    contents_->readCache->cache(std::move(read_ranges));
+   }
+
+   RowReader::~RowReader() {
+diff --git a/c++/src/Reader.hh b/c++/src/Reader.hh
+index 39ca73967..13da45a49 100644
+--- a/c++/src/Reader.hh
++++ b/c++/src/Reader.hh
+@@ -387,6 +387,9 @@ namespace orc {
+     std::map<uint32_t, BloomFilterIndex> getBloomFilters(
+         uint32_t stripeIndex, const std::set<uint32_t>& included) const 
override;
+
++    std::vector<std::pair<uint64_t, uint64_t>> preBufferRange(
++        const std::vector<uint32_t>& stripes, const std::list<uint64_t>& 
includeTypes) override;
++
+     void preBuffer(const std::vector<uint32_t>& stripes,
+                    const std::list<uint64_t>& includeTypes) override;
+     void releaseBuffer(uint64_t boundary) override;
+diff --git a/cmake_modules/ThirdpartyToolchain.cmake 
b/cmake_modules/ThirdpartyToolchain.cmake
+index 9b2c829c7..434841224 100644
+--- a/cmake_modules/ThirdpartyToolchain.cmake
++++ b/cmake_modules/ThirdpartyToolchain.cmake
+@@ -19,6 +19,8 @@ set(ORC_VENDOR_DEPENDENCIES)
+ set(ORC_SYSTEM_DEPENDENCIES)
+ set(ORC_INSTALL_INTERFACE_TARGETS)
+
++set(BUILD_POSITION_INDEPENDENT_LIB ON)
++
+ set(ORC_FORMAT_VERSION "1.0.0")
+ set(LZ4_VERSION "1.10.0")
+ set(SNAPPY_VERSION "1.2.1")

Reply via email to