This is an automated email from the ASF dual-hosted git repository. leaves12138 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/paimon-cpp.git
commit 0a59d4496d25f9d8b90300c1590ad7130faaee79 Author: yonghao.fyh <[email protected]> AuthorDate: Fri May 22 17:50:48 2026 +0800 chore: add thirdparty toolchain and thirdparty diff --- cmake_modules/ThirdpartyToolchain.cmake | 1787 +++++++++++++++++++++++++++++++ cmake_modules/arrow.diff | 213 ++++ cmake_modules/jieba.diff | 16 + cmake_modules/orc.diff | 437 ++++++++ 4 files changed, 2453 insertions(+) diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake new file mode 100644 index 0000000..2be9097 --- /dev/null +++ b/cmake_modules/ThirdpartyToolchain.cmake @@ -0,0 +1,1787 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(THIRDPARTY_LOG_OPTIONS + LOG_CONFIGURE + 1 + LOG_BUILD + 1 + LOG_INSTALL + 1 + LOG_DOWNLOAD + 1) +set(THIRDPARTY_CONFIGURE_COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}") +if(CMAKE_GENERATOR_TOOLSET) + list(APPEND THIRDPARTY_CONFIGURE_COMMAND -T "${CMAKE_GENERATOR_TOOLSET}") +endif() + +string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE) +string(TOLOWER ${CMAKE_BUILD_TYPE} LOWERCASE_BUILD_TYPE) + +set(EP_COMMON_TOOLCHAIN "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" + "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}") + +macro(set_urls URLS) + set(${URLS} ${ARGN}) +endmacro() + +set(THIRDPARTY_DIR "${CMAKE_SOURCE_DIR}/third_party") +# Read toolchain versions from third_party/versions.txt +file(STRINGS "${THIRDPARTY_DIR}/versions.txt" TOOLCHAIN_VERSIONS_TXT) +foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT}) + # Exclude comments + if(NOT + ((_VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_VERSION=") + OR (_VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_CHECKSUM=") + OR (_VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_PKG_NAME="))) + continue() + endif() + + string(REGEX MATCH "^[^=]*" _VARIABLE_NAME ${_VERSION_ENTRY}) + string(REPLACE "${_VARIABLE_NAME}=" "" _VARIABLE_VALUE ${_VERSION_ENTRY}) + + # Skip blank or malformed lines + if(_VARIABLE_VALUE STREQUAL "") + continue() + endif() + + set(${_VARIABLE_NAME} ${_VARIABLE_VALUE}) + + if(_VARIABLE_NAME MATCHES "_PKG_NAME$") + # Expand version to package name + string(CONFIGURE "${${_VARIABLE_NAME}}" _EXPANDED ESCAPE_QUOTES) + set(${_VARIABLE_NAME} "${_EXPANDED}") + endif() + + # For debugging + message(STATUS "${_VARIABLE_NAME}: ${${_VARIABLE_NAME}}") +endforeach() + +if(DEFINED ENV{PAIMON_THIRDPARTY_MIRROR_URL}) + set(THIRDPARTY_MIRROR_URL "$ENV{PAIMON_THIRDPARTY_MIRROR_URL}") +else() + set(THIRDPARTY_MIRROR_URL "") +endif() + +if(DEFINED ENV{PAIMON_ARROW_URL}) + set(ARROW_SOURCE_URL "$ENV{PAIMON_ARROW_URL}") +else() + if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_ARROW_PKG_NAME}") + set_urls(ARROW_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_ARROW_PKG_NAME}") + else() + set_urls(ARROW_SOURCE_URL + "${THIRDPARTY_MIRROR_URL}https://github.com/apache/arrow/releases/download/apache-arrow-${PAIMON_ARROW_BUILD_VERSION}/apache-arrow-${PAIMON_ARROW_BUILD_VERSION}.tar.gz" + ) + endif() +endif() + +if(DEFINED ENV{PAIMON_RAPIDJSON_URL}) + set(RAPIDJSON_SOURCE_URL "$ENV{PAIMON_RAPIDJSON_URL}") +else() + if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_RAPIDJSON_PKG_NAME}") + set_urls(RAPIDJSON_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_RAPIDJSON_PKG_NAME}") + else() + set_urls(RAPIDJSON_SOURCE_URL + "${THIRDPARTY_MIRROR_URL}https://github.com/miloyip/rapidjson/archive/${PAIMON_RAPIDJSON_BUILD_VERSION}.tar.gz" + ) + endif() +endif() + +if(DEFINED ENV{PAIMON_FMT_URL}) + set(FMT_SOURCE_URL "$ENV{PAIMON_FMT_URL}") +else() + if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_FMT_PKG_NAME}") + set_urls(FMT_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_FMT_PKG_NAME}") + else() + set_urls(FMT_SOURCE_URL + "${THIRDPARTY_MIRROR_URL}https://github.com/fmtlib/fmt/archive/refs/tags/${PAIMON_FMT_BUILD_VERSION}.tar.gz" + ) + endif() +endif() + +if(DEFINED ENV{PAIMON_LUCENE_URL}) + set(LUCENE_SOURCE_URL "$ENV{PAIMON_LUCENE_URL}") +else() + if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_LUCENE_PKG_NAME}") + set_urls(LUCENE_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_LUCENE_PKG_NAME}") + else() + set_urls(LUCENE_SOURCE_URL + "${THIRDPARTY_MIRROR_URL}https://github.com/luceneplusplus/LucenePlusPlus/archive/refs/tags/rel_${PAIMON_LUCENE_BUILD_VERSION}.tar.gz" + ) + endif() +endif() + +if(DEFINED ENV{PAIMON_LIMONP_URL}) + set(LIMONP_SOURCE_URL "$ENV{PAIMON_LIMONP_URL}") +else() + if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_LIMONP_PKG_NAME}") + set_urls(LIMONP_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_LIMONP_PKG_NAME}") + else() + set_urls(LIMONP_SOURCE_URL + "${THIRDPARTY_MIRROR_URL}https://github.com/yanyiwu/limonp/archive/refs/tags/v${PAIMON_LIMONP_BUILD_VERSION}.tar.gz" + ) + endif() +endif() + +if(DEFINED ENV{PAIMON_JIEBA_URL}) + set(JIEBA_SOURCE_URL "$ENV{PAIMON_JIEBA_URL}") +else() + if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_JIEBA_PKG_NAME}") + set_urls(JIEBA_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_JIEBA_PKG_NAME}") + else() + set_urls(JIEBA_SOURCE_URL + "${THIRDPARTY_MIRROR_URL}https://github.com/yanyiwu/cppjieba/archive/refs/tags/${PAIMON_JIEBA_BUILD_VERSION}.tar.gz" + ) + endif() +endif() + +if(DEFINED ENV{PAIMON_RE2_URL}) + set(RE2_SOURCE_URL "$ENV{PAIMON_RE2_URL}") +else() + if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_RE2_PKG_NAME}") + set_urls(RE2_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_RE2_PKG_NAME}") + else() + set_urls(RE2_SOURCE_URL + "${THIRDPARTY_MIRROR_URL}https://github.com/google/re2/archive/${PAIMON_RE2_BUILD_VERSION}.tar.gz" + ) + endif() +endif() + +if(DEFINED ENV{PAIMON_GLOG_URL}) + set(GLOG_SOURCE_URL "$ENV{PAIMON_GLOG_URL}") +else() + if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_GLOG_PKG_NAME}") + set_urls(GLOG_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_GLOG_PKG_NAME}") + else() + set_urls(GLOG_SOURCE_URL + "${THIRDPARTY_MIRROR_URL}https://github.com/google/glog/archive/${PAIMON_GLOG_BUILD_VERSION}.tar.gz" + ) + endif() +endif() + +if(DEFINED ENV{PAIMON_ZLIB_URL}) + set(ZLIB_SOURCE_URL "$ENV{PAIMON_ZLIB_URL}") +else() + if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_ZLIB_PKG_NAME}") + set_urls(ZLIB_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_ZLIB_PKG_NAME}") + else() + set_urls(ZLIB_SOURCE_URL + "${THIRDPARTY_MIRROR_URL}https://github.com/madler/zlib/releases/download/v${PAIMON_ZLIB_BUILD_VERSION}/zlib-${PAIMON_ZLIB_BUILD_VERSION}.tar.gz" + ) + endif() +endif() + +if(DEFINED ENV{PAIMON_ZSTD_URL}) + set(ZSTD_SOURCE_URL "$ENV{PAIMON_ZSTD_URL}") +else() + if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_ZSTD_PKG_NAME}") + set_urls(ZSTD_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_ZSTD_PKG_NAME}") + else() + set_urls(ZSTD_SOURCE_URL + "${THIRDPARTY_MIRROR_URL}https://github.com/facebook/zstd/releases/download/v${PAIMON_ZSTD_BUILD_VERSION}/zstd-${PAIMON_ZSTD_BUILD_VERSION}.tar.gz" + ) + endif() +endif() + +if(DEFINED ENV{PAIMON_LZ4_URL}) + set(LZ4_SOURCE_URL "$ENV{PAIMON_LZ4_URL}") +else() + if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_LZ4_PKG_NAME}") + set_urls(LZ4_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_LZ4_PKG_NAME}") + else() + set_urls(LZ4_SOURCE_URL + "${THIRDPARTY_MIRROR_URL}https://github.com/lz4/lz4/archive/${PAIMON_LZ4_BUILD_VERSION}.tar.gz" + ) + endif() +endif() + +if(DEFINED ENV{PAIMON_SNAPPY_URL}) + set(SNAPPY_SOURCE_URL "$ENV{PAIMON_SNAPPY_URL}") +else() + if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_SNAPPY_PKG_NAME}") + set_urls(SNAPPY_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_SNAPPY_PKG_NAME}") + else() + set_urls(SNAPPY_SOURCE_URL + "${THIRDPARTY_MIRROR_URL}https://github.com/google/snappy/archive/${PAIMON_SNAPPY_BUILD_VERSION}.tar.gz" + ) + endif() +endif() + +if(DEFINED ENV{PAIMON_PROTOBUF_URL}) + set(PROTOBUF_SOURCE_URL "$ENV{PAIMON_PROTOBUF_URL}") +else() + if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_PROTOBUF_PKG_NAME}") + set_urls(PROTOBUF_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_PROTOBUF_PKG_NAME}") + else() + set_urls(PROTOBUF_SOURCE_URL + "${THIRDPARTY_MIRROR_URL}https://github.com/protocolbuffers/protobuf/releases/download/v${PAIMON_PROTOBUF_BUILD_VERSION}/protobuf-all-${PAIMON_PROTOBUF_BUILD_VERSION}.tar.gz" + ) + endif() +endif() + +if(DEFINED ENV{PAIMON_GTEST_URL}) + set(GTEST_SOURCE_URL "$ENV{PAIMON_GTEST_URL}") +else() + if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_GTEST_PKG_NAME}") + set_urls(GTEST_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_GTEST_PKG_NAME}") + else() + set_urls(GTEST_SOURCE_URL + "${THIRDPARTY_MIRROR_URL}https://github.com/google/googletest/archive/release-${PAIMON_GTEST_BUILD_VERSION}.tar.gz" + ) + endif() +endif() + +if(DEFINED ENV{PAIMON_TBB_URL}) + set(TBB_SOURCE_URL "$ENV{PAIMON_TBB_URL}") +else() + if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_TBB_PKG_NAME}") + set_urls(TBB_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_TBB_PKG_NAME}") + else() + set_urls(TBB_SOURCE_URL + "${THIRDPARTY_MIRROR_URL}https://github.com/uxlfoundation/oneTBB/archive/refs/tags/${PAIMON_TBB_BUILD_VERSION}.tar.gz" + ) + endif() +endif() + +if(DEFINED ENV{PAIMON_ORC_URL}) + set(ORC_SOURCE_URL "$ENV{PAIMON_ORC_URL}") +else() + if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_ORC_PKG_NAME}") + set_urls(ORC_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_ORC_PKG_NAME}") + else() + set_urls(ORC_SOURCE_URL + "${THIRDPARTY_MIRROR_URL}https://github.com/apache/orc/archive/refs/tags/${PAIMON_ORC_BUILD_VERSION}.tar.gz" + ) + endif() +endif() + +if(DEFINED ENV{PAIMON_AVRO_URL}) + set(AVRO_SOURCE_URL "$ENV{PAIMON_AVRO_URL}") +else() + if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_AVRO_PKG_NAME}") + set_urls(AVRO_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_AVRO_PKG_NAME}") + else() + set_urls(AVRO_SOURCE_URL + "${THIRDPARTY_MIRROR_URL}https://github.com/apache/avro/archive/${PAIMON_AVRO_BUILD_VERSION}.tar.gz" + ) + endif() +endif() + +if(APPLE) + set(JINDOSDK_C_DYNAMIC_LIB_NAME "jindosdk_c.${PAIMON_JINDOSDK_C_BUILD_VERSION}") + set(JINDOSDK_C_DYNAMIC_LIB_FILE "lib${JINDOSDK_C_DYNAMIC_LIB_NAME}.dylib") + if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|AMD64") + set(JINDOSDK_C_BUILD_SHA256_CHECKSUM + "${PAIMON_JINDOSDK_C_MACOS_X86_64_BUILD_SHA256_CHECKSUM}") + if(DEFINED ENV{PAIMON_JINDOSDK_C_MACOS_X86_64_URL}) + set(JINDOSDK_C_SOURCE_URL "$ENV{PAIMON_JINDOSDK_C_MACOS_X86_64_URL}") + else() + set_urls(JINDOSDK_C_SOURCE_URL + "https://jindodata-binary.oss-cn-shanghai.aliyuncs.com/release/${PAIMON_JINDOSDK_C_BUILD_VERSION}/jindosdk-${PAIMON_JINDOSDK_C_BUILD_VERSION}-macos-11_0-x86_64.tar.gz" + ) + endif() + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64") + set(JINDOSDK_C_BUILD_SHA256_CHECKSUM + "${PAIMON_JINDOSDK_C_MACOS_AARCH64_BUILD_SHA256_CHECKSUM}") + if(DEFINED ENV{PAIMON_JINDOSDK_C_MACOS_AARCH64_URL}) + set(JINDOSDK_C_SOURCE_URL "$ENV{PAIMON_JINDOSDK_C_MACOS_AARCH64_URL}") + else() + set_urls(JINDOSDK_C_SOURCE_URL + "https://jindodata-binary.oss-cn-shanghai.aliyuncs.com/release/${PAIMON_JINDOSDK_C_BUILD_VERSION}/jindosdk-${PAIMON_JINDOSDK_C_BUILD_VERSION}-macos-11_0-aarch64.tar.gz" + ) + endif() + endif() +else() + set(JINDOSDK_C_DYNAMIC_LIB_NAME "jindosdk_c") + set(JINDOSDK_C_DYNAMIC_LIB_FILE "lib${JINDOSDK_C_DYNAMIC_LIB_NAME}.so") + if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|AMD64") + set(JINDOSDK_C_BUILD_SHA256_CHECKSUM + "${PAIMON_JINDOSDK_C_LINUX_X86_64_BUILD_SHA256_CHECKSUM}") + if(DEFINED ENV{PAIMON_JINDOSDK_C_LINUX_X86_64_URL}) + set(JINDOSDK_C_SOURCE_URL "$ENV{PAIMON_JINDOSDK_C_LINUX_X86_64_URL}") + else() + set_urls(JINDOSDK_C_SOURCE_URL + "https://jindodata-binary.oss-cn-shanghai.aliyuncs.com/release/${PAIMON_JINDOSDK_C_BUILD_VERSION}/jindosdk-${PAIMON_JINDOSDK_C_BUILD_VERSION}-linux.tar.gz" + ) + endif() + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64") + set(JINDOSDK_C_BUILD_SHA256_CHECKSUM + "${PAIMON_JINDOSDK_C_LINUX_AARCH64_BUILD_SHA256_CHECKSUM}") + if(DEFINED ENV{PAIMON_JINDOSDK_C_LINUX_AARCH64_URL}) + set(JINDOSDK_C_SOURCE_URL "$ENV{PAIMON_JINDOSDK_C_LINUX_AARCH64_URL}") + else() + set_urls(JINDOSDK_C_SOURCE_URL + "https://jindodata-binary.oss-cn-shanghai.aliyuncs.com/release/${PAIMON_JINDOSDK_C_BUILD_VERSION}/jindosdk-${PAIMON_JINDOSDK_C_BUILD_VERSION}-linux-el7-aarch64.tar.gz" + ) + endif() + endif() +endif() + +set(EP_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +set(EP_C_FLAGS "${CMAKE_C_FLAGS}") +string(REPLACE "-Wglobal-constructors" "" EP_CXX_FLAGS ${EP_CXX_FLAGS}) +string(REPLACE "-Wglobal-constructors" "" EP_C_FLAGS ${EP_C_FLAGS}) +# Remove coverage flags from third-party dependencies to avoid gcov dependency +string(REPLACE "--coverage" "" EP_CXX_FLAGS ${EP_CXX_FLAGS}) +string(REPLACE "--coverage" "" EP_C_FLAGS ${EP_C_FLAGS}) +string(REPLACE "-DCOVERAGE_BUILD" "" EP_CXX_FLAGS ${EP_CXX_FLAGS}) +string(REPLACE "-DCOVERAGE_BUILD" "" EP_C_FLAGS ${EP_C_FLAGS}) +if(NOT MSVC_TOOLCHAIN) + # Set -fPIC on all external projects + string(APPEND EP_CXX_FLAGS + " -fPIC -Wno-error -Wno-sign-compare -Wno-ignored-attributes") + string(APPEND EP_C_FLAGS " -fPIC") +endif() + +if(PAIMON_USE_CXX11_ABI) + string(APPEND EP_CXX_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=1") +else() + string(APPEND EP_CXX_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=0") +endif() + +# External projects are still able to override the following declarations. +# cmake command line will favor the last defined variable when a duplicate is +# encountered. This requires that `EP_COMMON_CMAKE_ARGS` is always the first +# argument. +set(EP_COMMON_CMAKE_ARGS + ${EP_COMMON_TOOLCHAIN} + -DBUILD_SHARED_LIBS=OFF + -DBUILD_STATIC_LIBS=ON + -DBUILD_TESTING=OFF + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS} + -DCMAKE_C_FLAGS=${EP_C_FLAGS} + -DCMAKE_INSTALL_LIBDIR=lib) + +if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.30") + list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_POLICY_VERSION_MINIMUM=3.5) +endif() + +set(PAIMON_DEPENDENCY_SOURCE_VALUES AUTO BUNDLED SYSTEM) + +function(paimon_validate_dependency_source SOURCE_VALUE OPTION_NAME) + string(TOUPPER "${SOURCE_VALUE}" _source) + list(FIND PAIMON_DEPENDENCY_SOURCE_VALUES "${_source}" _source_index) + if(_source_index EQUAL -1) + message(FATAL_ERROR "${OPTION_NAME} got invalid value '${SOURCE_VALUE}'. " + "Allowed values: AUTO, BUNDLED, SYSTEM.") + endif() +endfunction() + +function(paimon_get_dependency_source DEPENDENCY_NAME OUT_VAR) + set(_source_option_name "${DEPENDENCY_NAME}_SOURCE") + set(_source "${${DEPENDENCY_NAME}_SOURCE}") + if("${_source}" STREQUAL "") + get_property(_source GLOBAL PROPERTY "PAIMON_${DEPENDENCY_NAME}_DERIVED_SOURCE") + if("${_source}" STREQUAL "") + set(_source "${PAIMON_DEPENDENCY_SOURCE}") + set(_source_option_name "PAIMON_DEPENDENCY_SOURCE") + else() + set(_source_option_name "derived ${DEPENDENCY_NAME}_SOURCE") + endif() + endif() + string(TOUPPER "${_source}" _source) + paimon_validate_dependency_source("${_source}" "${_source_option_name}") + set(${OUT_VAR} + "${_source}" + PARENT_SCOPE) +endfunction() + +function(paimon_set_dependency_source_default DEPENDENCY_NAME SOURCE_VALUE REASON) + if("${${DEPENDENCY_NAME}_SOURCE}" STREQUAL "") + string(TOUPPER "${SOURCE_VALUE}" _source) + paimon_validate_dependency_source("${_source}" + "derived ${DEPENDENCY_NAME}_SOURCE") + set_property(GLOBAL PROPERTY "PAIMON_${DEPENDENCY_NAME}_DERIVED_SOURCE" + "${_source}") + message(STATUS "Defaulting ${DEPENDENCY_NAME}_SOURCE to ${_source}: ${REASON}") + endif() +endfunction() + +function(paimon_apply_dependency_source_defaults) + paimon_get_dependency_source(Arrow _arrow_source) + if(_arrow_source STREQUAL "SYSTEM" OR _arrow_source STREQUAL "BUNDLED") + foreach(_dependency + zstd + Snappy + LZ4 + ZLIB + RE2) + paimon_set_dependency_source_default( + ${_dependency} ${_arrow_source} + "follow Arrow_SOURCE to avoid mixed transitive dependencies") + endforeach() + elseif(_arrow_source STREQUAL "AUTO") + paimon_configure_dependency_root(Arrow "${_arrow_source}" _arrow_resolved_source) + find_package(ArrowAlt QUIET MODULE) + if(ArrowAlt_FOUND) + set(_arrow_dependency_default SYSTEM) + set(_arrow_dependency_reason + "system Arrow found during AUTO dependency precheck") + else() + set(_arrow_dependency_default BUNDLED) + set(_arrow_dependency_reason + "system Arrow not found during AUTO dependency precheck") + endif() + foreach(_dependency + zstd + Snappy + LZ4 + ZLIB + RE2) + paimon_set_dependency_source_default( + ${_dependency} ${_arrow_dependency_default} "${_arrow_dependency_reason}") + endforeach() + endif() + + if(PAIMON_ENABLE_ORC) + paimon_get_dependency_source(ORC _orc_source) + if(_orc_source STREQUAL "SYSTEM" OR _orc_source STREQUAL "BUNDLED") + paimon_set_dependency_source_default( + Protobuf ${_orc_source} + "follow ORC_SOURCE to avoid mixed transitive dependencies") + elseif(_orc_source STREQUAL "AUTO") + paimon_configure_dependency_root(ORC "${_orc_source}" _orc_resolved_source) + find_package(ORCAlt QUIET MODULE) + if(ORCAlt_FOUND) + paimon_set_dependency_source_default( + Protobuf SYSTEM "system ORC found during AUTO dependency precheck") + else() + paimon_set_dependency_source_default( + Protobuf BUNDLED + "system ORC not found during AUTO dependency precheck") + endif() + endif() + endif() +endfunction() + +function(paimon_configure_dependency_root DEPENDENCY_NAME SOURCE_VALUE OUT_SOURCE) + set(${OUT_SOURCE} + "${SOURCE_VALUE}" + PARENT_SCOPE) +endfunction() + +function(paimon_get_dependency_root DEPENDENCY_NAME OUT_VAR) + set(_root_var "${DEPENDENCY_NAME}_ROOT") + if(DEFINED ${_root_var} AND NOT "${${_root_var}}" STREQUAL "") + set(${OUT_VAR} + "${${_root_var}}" + PARENT_SCOPE) + elseif(NOT "${PAIMON_PACKAGE_PREFIX}" STREQUAL "") + set(${OUT_VAR} + "${PAIMON_PACKAGE_PREFIX}" + PARENT_SCOPE) + else() + set(${OUT_VAR} + "<default search paths>" + PARENT_SCOPE) + endif() +endfunction() + +function(paimon_get_dependency_compat_target DEPENDENCY_NAME OUT_VAR) + if("${DEPENDENCY_NAME}" STREQUAL "Arrow") + set(_target arrow) + elseif("${DEPENDENCY_NAME}" STREQUAL "ORC") + set(_target orc::orc) + elseif("${DEPENDENCY_NAME}" STREQUAL "Protobuf") + set(_target libprotobuf) + elseif("${DEPENDENCY_NAME}" STREQUAL "GTest") + set(_target GTest::gtest) + elseif("${DEPENDENCY_NAME}" STREQUAL "RE2") + set(_target re2::re2) + elseif("${DEPENDENCY_NAME}" STREQUAL "Snappy") + set(_target snappy) + elseif("${DEPENDENCY_NAME}" STREQUAL "LZ4") + set(_target lz4) + elseif("${DEPENDENCY_NAME}" STREQUAL "ZLIB") + set(_target zlib) + elseif("${DEPENDENCY_NAME}" STREQUAL "TBB") + set(_target tbb) + elseif("${DEPENDENCY_NAME}" STREQUAL "Avro") + set(_target avro) + else() + set(_target "${DEPENDENCY_NAME}") + endif() + + set(${OUT_VAR} + "${_target}" + PARENT_SCOPE) +endfunction() + +function(paimon_record_dependency_resolution + DEPENDENCY_NAME + REQUESTED_SOURCE + ACTUAL_SOURCE + TARGET_NAME) + get_property(_dependencies GLOBAL PROPERTY PAIMON_RESOLVED_DEPENDENCIES) + list(APPEND _dependencies "${DEPENDENCY_NAME}") + list(REMOVE_DUPLICATES _dependencies) + set_property(GLOBAL PROPERTY PAIMON_RESOLVED_DEPENDENCIES "${_dependencies}") + + paimon_get_dependency_root("${DEPENDENCY_NAME}" _root) + set_property(GLOBAL PROPERTY "PAIMON_${DEPENDENCY_NAME}_REQUESTED_SOURCE" + "${REQUESTED_SOURCE}") + set_property(GLOBAL PROPERTY "PAIMON_${DEPENDENCY_NAME}_ACTUAL_SOURCE" + "${ACTUAL_SOURCE}") + set_property(GLOBAL PROPERTY "PAIMON_${DEPENDENCY_NAME}_ROOT" "${_root}") + set_property(GLOBAL PROPERTY "PAIMON_${DEPENDENCY_NAME}_TARGET" "${TARGET_NAME}") +endfunction() + +function(paimon_print_dependency_resolution_summary) + get_property(_dependencies GLOBAL PROPERTY PAIMON_RESOLVED_DEPENDENCIES) + if(NOT _dependencies) + return() + endif() + + message(STATUS "Dependency resolution summary:") + foreach(_dependency IN LISTS _dependencies) + get_property(_requested GLOBAL PROPERTY "PAIMON_${_dependency}_REQUESTED_SOURCE") + get_property(_actual GLOBAL PROPERTY "PAIMON_${_dependency}_ACTUAL_SOURCE") + get_property(_root GLOBAL PROPERTY "PAIMON_${_dependency}_ROOT") + get_property(_target GLOBAL PROPERTY "PAIMON_${_dependency}_TARGET") + message(STATUS " ${_dependency}: requested=${_requested}, actual=${_actual}, target=${_target}, root=${_root}" + ) + endforeach() +endfunction() + +macro(paimon_build_dependency DEPENDENCY_NAME) + if("${DEPENDENCY_NAME}" STREQUAL "Arrow") + build_arrow() + elseif("${DEPENDENCY_NAME}" STREQUAL "fmt") + build_fmt() + elseif("${DEPENDENCY_NAME}" STREQUAL "RapidJSON") + build_rapidjson() + elseif("${DEPENDENCY_NAME}" STREQUAL "zstd") + build_zstd() + elseif("${DEPENDENCY_NAME}" STREQUAL "Snappy") + build_snappy() + elseif("${DEPENDENCY_NAME}" STREQUAL "LZ4") + build_lz4() + elseif("${DEPENDENCY_NAME}" STREQUAL "ZLIB") + build_zlib() + elseif("${DEPENDENCY_NAME}" STREQUAL "RE2") + build_re2() + elseif("${DEPENDENCY_NAME}" STREQUAL "Protobuf") + build_protobuf() + elseif("${DEPENDENCY_NAME}" STREQUAL "ORC") + build_orc() + elseif("${DEPENDENCY_NAME}" STREQUAL "TBB") + build_tbb() + elseif("${DEPENDENCY_NAME}" STREQUAL "glog") + build_glog() + elseif("${DEPENDENCY_NAME}" STREQUAL "Avro") + build_avro() + elseif("${DEPENDENCY_NAME}" STREQUAL "GTest") + build_gtest() + else() + message(FATAL_ERROR "No bundled build rule for ${DEPENDENCY_NAME}") + endif() +endmacro() + +macro(resolve_dependency DEPENDENCY_NAME) + set(options) + set(one_value_args FIND_PACKAGE_NAME) + set(multi_value_args) + cmake_parse_arguments(ARG + "${options}" + "${one_value_args}" + "${multi_value_args}" + ${ARGN}) + + if(ARG_FIND_PACKAGE_NAME) + set(_paimon_find_package_name "${ARG_FIND_PACKAGE_NAME}") + else() + set(_paimon_find_package_name "${DEPENDENCY_NAME}") + endif() + set(_paimon_alt_package_name "${_paimon_find_package_name}Alt") + set(_paimon_found_var "${_paimon_alt_package_name}_FOUND") + + paimon_get_dependency_source(${DEPENDENCY_NAME} _paimon_requested_source) + paimon_configure_dependency_root(${DEPENDENCY_NAME} "${_paimon_requested_source}" + _paimon_resolved_source) + paimon_get_dependency_compat_target(${DEPENDENCY_NAME} _paimon_target_name) + + if(_paimon_resolved_source STREQUAL "BUNDLED") + message(STATUS "Using bundled ${DEPENDENCY_NAME}") + paimon_build_dependency(${DEPENDENCY_NAME}) + set(PAIMON_${DEPENDENCY_NAME}_ACTUAL_SOURCE + "BUNDLED" + CACHE INTERNAL "Actual source for ${DEPENDENCY_NAME}") + paimon_record_dependency_resolution( + ${DEPENDENCY_NAME} "${_paimon_requested_source}" "BUNDLED" + "${_paimon_target_name}") + elseif(_paimon_resolved_source STREQUAL "SYSTEM") + message(STATUS "Using system ${DEPENDENCY_NAME}") + find_package(${_paimon_alt_package_name} REQUIRED MODULE) + set(PAIMON_${DEPENDENCY_NAME}_ACTUAL_SOURCE + "${_paimon_requested_source}" + CACHE INTERNAL "Actual source for ${DEPENDENCY_NAME}") + paimon_record_dependency_resolution( + ${DEPENDENCY_NAME} "${_paimon_requested_source}" + "${_paimon_requested_source}" "${_paimon_target_name}") + elseif(_paimon_resolved_source STREQUAL "AUTO") + message(STATUS "Resolving ${DEPENDENCY_NAME} with AUTO source") + find_package(${_paimon_alt_package_name} QUIET MODULE) + if(${_paimon_found_var}) + message(STATUS "Using system ${DEPENDENCY_NAME}") + set(PAIMON_${DEPENDENCY_NAME}_ACTUAL_SOURCE + "SYSTEM" + CACHE INTERNAL "Actual source for ${DEPENDENCY_NAME}") + paimon_record_dependency_resolution( + ${DEPENDENCY_NAME} "${_paimon_requested_source}" "SYSTEM" + "${_paimon_target_name}") + else() + message(STATUS "System ${DEPENDENCY_NAME} not found; using bundled") + paimon_build_dependency(${DEPENDENCY_NAME}) + set(PAIMON_${DEPENDENCY_NAME}_ACTUAL_SOURCE + "BUNDLED" + CACHE INTERNAL "Actual source for ${DEPENDENCY_NAME}") + paimon_record_dependency_resolution( + ${DEPENDENCY_NAME} "${_paimon_requested_source}" "BUNDLED" + "${_paimon_target_name}") + endif() + else() + message(FATAL_ERROR "Unsupported source ${_paimon_resolved_source} " + "for ${DEPENDENCY_NAME}") + endif() + + unset(_paimon_find_package_name) + unset(_paimon_alt_package_name) + unset(_paimon_found_var) + unset(_paimon_requested_source) + unset(_paimon_resolved_source) + unset(_paimon_target_name) +endmacro() + +function(paimon_warn_if_mixed_arrow_dependencies) + if(NOT DEFINED PAIMON_Arrow_ACTUAL_SOURCE) + return() + endif() + + foreach(_dependency + zstd + Snappy + LZ4 + ZLIB + RE2) + if(DEFINED PAIMON_${_dependency}_ACTUAL_SOURCE + AND NOT "${PAIMON_${_dependency}_ACTUAL_SOURCE}" STREQUAL + "${PAIMON_Arrow_ACTUAL_SOURCE}") + message(WARNING "Arrow resolved from ${PAIMON_Arrow_ACTUAL_SOURCE}, but " + "${_dependency} resolved from " + "${PAIMON_${_dependency}_ACTUAL_SOURCE}. Mixing SYSTEM " + "and BUNDLED dependencies can cause ABI conflicts.") + endif() + endforeach() +endfunction() + +macro(build_lucene) + message(STATUS "Building lucene from source") + + get_target_property(LUCENE_ZLIB_INCLUDE_DIR zlib INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(LUCENE_ZLIB_ROOT "${LUCENE_ZLIB_INCLUDE_DIR}" DIRECTORY) + + set(LUCENE_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/lucene_ep-install") + + set(LUCENE_CMAKE_CXX_FLAGS "-pthread") + if(PAIMON_USE_CXX11_ABI) + string(APPEND LUCENE_CMAKE_CXX_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=1") + else() + string(APPEND LUCENE_CMAKE_CXX_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=0") + endif() + + set(LUCENE_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + "-DLUCENE_BUILD_SHARED=OFF" + "-DENABLE_TEST=OFF" + "-DCMAKE_C_FLAGS=-pthread" + "-DCMAKE_CXX_FLAGS=${LUCENE_CMAKE_CXX_FLAGS}" + "-DCMAKE_EXE_LINKER_FLAGS=-pthread" + "-DBoost_NO_BOOST_CMAKE=ON" + "-DBoost_NO_SYSTEM_PATHS=ON" + "-DBoost_USE_STATIC_LIBS=ON" + "-DBoost_INCLUDE_DIR=${BOOST_INCLUDE_DIR}" + "-DBoost_LIBRARY_DIR=${BOOST_LIBRARY_DIR}" + "-DBOOST_ROOT=${BOOST_INSTALL}" + "-DBoost_CHRONO_FOUND=TRUE" + "-DBoost_THREAD_FOUND=TRUE" + "-DZLIB_INCLUDE_DIRS=${ZLIB_INCLUDE_DIR}" + "-DZLIB_LIBRARY_RELEASE=${ZLIB_LIBRARIES}" + "-DZLIB_ROOT=${LUCENE_ZLIB_ROOT}" + "-DCMAKE_INSTALL_PREFIX=${LUCENE_PREFIX}") + + set(LUCENE_LIB "${LUCENE_PREFIX}/lib/liblucene++.a") + externalproject_add(lucene_ep + ${EP_COMMON_OPTIONS} + URL ${LUCENE_SOURCE_URL} + URL_HASH "SHA256=${PAIMON_LUCENE_BUILD_SHA256_CHECKSUM}" + CMAKE_ARGS ${LUCENE_CMAKE_ARGS} + BUILD_BYPRODUCTS ${LUCENE_LIB} + DEPENDS zlib + boost_date_time + boost_filesystem + boost_regex + boost_thread + boost_iostreams + boost_system + boost_chrono + boost_atomic) + + set(LUCENE_INCLUDE_DIR "${LUCENE_PREFIX}/include") + # The include directory must exist before it is referenced by a target. + file(MAKE_DIRECTORY "${LUCENE_INCLUDE_DIR}") + include_directories(SYSTEM ${LUCENE_INCLUDE_DIR} ${BOOST_INCLUDE_DIR}) + add_library(lucene STATIC IMPORTED) + set_target_properties(lucene + PROPERTIES IMPORTED_LOCATION "${LUCENE_LIB}" + INTERFACE_INCLUDE_DIRECTORIES + "${LUCENE_INCLUDE_DIR}") + + target_link_libraries(lucene + INTERFACE zlib + boost_date_time + boost_filesystem + boost_regex + boost_thread + boost_iostreams + boost_system + boost_chrono + boost_atomic + pthread + dl) + add_dependencies(lucene lucene_ep) +endmacro() + +macro(build_jieba) + message(STATUS "Building limonp from source") + set(LIMONP_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/limonp_ep-prefix") + externalproject_add(limonp_ep + URL ${LIMONP_SOURCE_URL} + URL_HASH "SHA256=${PAIMON_LIMONP_BUILD_SHA256_CHECKSUM}" + INSTALL_COMMAND "") + + message(STATUS "Building jieba from source") + set(JIEBA_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/jieba_ep-prefix") + set(JIEBA_INSTALL "${CMAKE_CURRENT_BINARY_DIR}/jieba_ep-install") + set(JIEBA_INCLUDE_DIR "${JIEBA_INSTALL}/include") + set(JIEBA_DICT_DIR "${JIEBA_INSTALL}/dict") + file(MAKE_DIRECTORY ${JIEBA_INCLUDE_DIR}) + file(MAKE_DIRECTORY ${JIEBA_DICT_DIR}) + + set(JIEBA_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} "-DENABLE_TEST=OFF" "-DCPPJIEBA_TOP_LEVEL_PROJECT=OFF" + "-DCMAKE_INSTALL_PREFIX=${JIEBA_INSTALL}") + + set(PATCH_FILE "${CMAKE_CURRENT_LIST_DIR}/jieba.diff") + externalproject_add(jieba_ep + ${EP_COMMON_OPTIONS} + URL ${JIEBA_SOURCE_URL} + URL_HASH "SHA256=${PAIMON_JIEBA_BUILD_SHA256_CHECKSUM}" + CMAKE_ARGS ${JIEBA_CMAKE_ARGS} + LOG_PATCH ON + PATCH_COMMAND ${CMAKE_COMMAND} -E chdir <SOURCE_DIR> bash -c + "[ -f .patched ] && echo '<SOURCE_DIR> patch already applied, ignore...' || patch -s -N -p1 -i '${PATCH_FILE}' && touch .patched" + INSTALL_COMMAND bash -c + "cp -r ${JIEBA_PREFIX}/src/jieba_ep/include/* ${JIEBA_INSTALL}/include/ && cp -r ${JIEBA_PREFIX}/src/jieba_ep/dict/* ${JIEBA_INSTALL}/dict/ && cp -r ${LIMONP_PREFIX}/src/limonp_ep/include/* ${JIEBA_INSTALL}/include/" + ) + + # The include directory must exist before it is referenced by a target. + include_directories(SYSTEM ${JIEBA_INCLUDE_DIR} ${JIEBA_DICT_DIR}) + add_library(jieba INTERFACE IMPORTED) + target_include_directories(jieba SYSTEM INTERFACE ${JIEBA_INCLUDE_DIR} + ${JIEBA_DICT_DIR}) + add_dependencies(jieba_ep limonp_ep) + add_dependencies(jieba jieba_ep) +endmacro() + +macro(build_rapidjson) + message(STATUS "Building RapidJSON from source") + set(RAPIDJSON_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/rapidjson_ep-install") + set(RAPIDJSON_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + -DRAPIDJSON_BUILD_DOC=OFF + -DRAPIDJSON_BUILD_EXAMPLES=OFF + -DRAPIDJSON_BUILD_TESTS=OFF + "-DCMAKE_INSTALL_PREFIX=${RAPIDJSON_PREFIX}") + + externalproject_add(rapidjson_ep + ${EP_COMMON_OPTIONS} + URL ${RAPIDJSON_SOURCE_URL} + URL_HASH "SHA256=${PAIMON_RAPIDJSON_BUILD_SHA256_CHECKSUM}" + CMAKE_ARGS ${RAPIDJSON_CMAKE_ARGS}) + + set(RAPIDJSON_INCLUDE_DIR "${RAPIDJSON_PREFIX}/include") + # The include directory must exist before it is referenced by a target. + file(MAKE_DIRECTORY "${RAPIDJSON_INCLUDE_DIR}") + + include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR}) + add_library(RapidJSON INTERFACE IMPORTED) + target_include_directories(RapidJSON INTERFACE "${RAPIDJSON_INCLUDE_DIR}") + add_dependencies(RapidJSON rapidjson_ep) +endmacro() + +macro(build_fmt) + message(STATUS "Building fmt from source") + set(FMT_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/fmt_ep-install") + set(FMT_INCLUDE_DIR "${FMT_PREFIX}/include") + if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG") + set(FMT_LIB_SUFFIX "d") + else() + set(FMT_LIB_SUFFIX "") + endif() + set(FMT_STATIC_LIB_NAME fmt) + set(FMT_STATIC_LIB + "${FMT_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${FMT_STATIC_LIB_NAME}${FMT_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + set(FMT_LIBRARIES ${FMT_STATIC_LIB}) + set(FMT_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} -Wno-error") + set(FMT_CMAKE_C_FLAGS "${EP_C_FLAGS} -Wno-error") + string(REPLACE "-Werror" "" FMT_CMAKE_CXX_FLAGS ${FMT_CMAKE_CXX_FLAGS}) + + set(FMT_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + -DCMAKE_INSTALL_PREFIX=${FMT_PREFIX} + "-DCMAKE_CXX_FLAGS=${FMT_CMAKE_CXX_FLAGS}" + "-DCMAKE_C_FLAGS=${FMT_CMAKE_C_FLAGS}" + -DFMT_TEST=OFF + -DFMT_DOC=OFF) + set(FMT_CONFIGURE CMAKE_ARGS ${FMT_CMAKE_ARGS}) + externalproject_add(fmt_ep + URL ${FMT_SOURCE_URL} + URL_HASH "SHA256=${PAIMON_FMT_BUILD_SHA256_CHECKSUM}" + ${FMT_CONFIGURE} ${THIRDPARTY_LOG_OPTIONS} + BUILD_BYPRODUCTS ${FMT_STATIC_LIB}) + + file(MAKE_DIRECTORY "${FMT_INCLUDE_DIR}") + + include_directories(SYSTEM ${FMT_INCLUDE_DIR}) + add_library(fmt STATIC IMPORTED) + set_target_properties(fmt PROPERTIES IMPORTED_LOCATION ${FMT_STATIC_LIB}) + target_include_directories(fmt INTERFACE ${FMT_INCLUDE_DIR}) + add_dependencies(fmt fmt_ep) +endmacro(build_fmt) + +macro(build_boost) + message(STATUS "Building boost from source") + set(BOOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/boost_ep-prefix") + set(BOOST_INSTALL "${CMAKE_CURRENT_BINARY_DIR}/boost_ep-install") + set(BOOST_INCLUDE_DIR "${BOOST_INSTALL}/include") + set(BOOST_LIBRARY_DIR ${BOOST_INSTALL}/lib) + file(MAKE_DIRECTORY ${BOOST_INCLUDE_DIR}) + file(MAKE_DIRECTORY ${BOOST_LIBRARY_DIR}) + + set(BOOST_BYPRODUCTS + ${BOOST_LIBRARY_DIR}/libboost_date_time.a + ${BOOST_LIBRARY_DIR}/libboost_filesystem.a + ${BOOST_LIBRARY_DIR}/libboost_system.a + ${BOOST_LIBRARY_DIR}/libboost_regex.a + ${BOOST_LIBRARY_DIR}/libboost_thread.a + ${BOOST_LIBRARY_DIR}/libboost_atomic.a + ${BOOST_LIBRARY_DIR}/libboost_chrono.a + ${BOOST_LIBRARY_DIR}/libboost_iostreams.a) + + set(BOOST_CXX_FLAGS "-fPIC") + if(PAIMON_USE_CXX11_ABI) + string(APPEND BOOST_CXX_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=1") + else() + string(APPEND BOOST_CXX_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=0") + endif() + + externalproject_add(boost_ep + URL "${THIRDPARTY_DIR}/boost/${PAIMON_BOOST_PKG_NAME}" + URL_HASH "SHA256=${PAIMON_BOOST_BUILD_SHA256_CHECKSUM}" + CONFIGURE_COMMAND ${BOOST_PREFIX}/src/boost_ep/bootstrap.sh + --with-libraries=date_time,filesystem,iostreams,regex,system,thread,chrono,atomic + BUILD_IN_SOURCE TRUE + BUILD_COMMAND ${BOOST_PREFIX}/src/boost_ep/b2 + --prefix=${BOOST_INSTALL} + --libdir=${BOOST_LIBRARY_DIR} link=static + -sZLIB_INCLUDE=${ZLIB_INCLUDE_DIR} + -sZLIB_LIBRARY_PATH=${ZLIB_PREFIX}/lib + runtime-link=shared threading=multi variant=release + cxxflags=${BOOST_CXX_FLAGS} install + INSTALL_COMMAND bash -c + "mkdir -p ${BOOST_INSTALL}/include/boost && cp -r ${BOOST_PREFIX}/src/boost_ep/libs/*/include/boost/* ${BOOST_INSTALL}/include/boost && cp -r ${BOOST_PREFIX}/src/boost_ep/libs/*/*/include/boost/* ${BOOST_INSTALL}/include/boost" + DEPENDS zlib + BUILD_BYPRODUCTS ${BOOST_BYPRODUCTS} + LOG_DOWNLOAD ON + LOG_CONFIGURE ON + LOG_BUILD ON) + + include_directories(SYSTEM ${BOOST_INCLUDE_DIR}) + + add_library(boost_atomic STATIC IMPORTED) + set_target_properties(boost_atomic + PROPERTIES IMPORTED_LOCATION + ${BOOST_LIBRARY_DIR}/libboost_atomic.a + INTERFACE_INCLUDE_DIRECTORIES ${BOOST_INCLUDE_DIR}) + add_library(boost_chrono STATIC IMPORTED) + set_target_properties(boost_chrono + PROPERTIES IMPORTED_LOCATION + ${BOOST_LIBRARY_DIR}/libboost_chrono.a + INTERFACE_INCLUDE_DIRECTORIES ${BOOST_INCLUDE_DIR}) + add_library(boost_date_time STATIC IMPORTED) + set_target_properties(boost_date_time + PROPERTIES IMPORTED_LOCATION + ${BOOST_LIBRARY_DIR}/libboost_date_time.a + INTERFACE_INCLUDE_DIRECTORIES ${BOOST_INCLUDE_DIR}) + add_library(boost_filesystem STATIC IMPORTED) + set_target_properties(boost_filesystem + PROPERTIES IMPORTED_LOCATION + ${BOOST_LIBRARY_DIR}/libboost_filesystem.a + INTERFACE_INCLUDE_DIRECTORIES ${BOOST_INCLUDE_DIR}) + add_library(boost_regex STATIC IMPORTED) + set_target_properties(boost_regex + PROPERTIES IMPORTED_LOCATION + ${BOOST_LIBRARY_DIR}/libboost_regex.a + INTERFACE_INCLUDE_DIRECTORIES ${BOOST_INCLUDE_DIR}) + add_library(boost_thread STATIC IMPORTED) + set_target_properties(boost_thread + PROPERTIES IMPORTED_LOCATION + ${BOOST_LIBRARY_DIR}/libboost_thread.a + INTERFACE_INCLUDE_DIRECTORIES ${BOOST_INCLUDE_DIR}) + add_library(boost_iostreams STATIC IMPORTED) + set_target_properties(boost_iostreams + PROPERTIES IMPORTED_LOCATION + ${BOOST_LIBRARY_DIR}/libboost_iostreams.a + INTERFACE_INCLUDE_DIRECTORIES ${BOOST_INCLUDE_DIR}) + add_library(boost_system STATIC IMPORTED) + set_target_properties(boost_system + PROPERTIES IMPORTED_LOCATION + ${BOOST_LIBRARY_DIR}/libboost_system.a + INTERFACE_INCLUDE_DIRECTORIES ${BOOST_INCLUDE_DIR}) + + add_dependencies(boost_atomic boost_ep) + add_dependencies(boost_chrono boost_ep) + add_dependencies(boost_date_time boost_ep) + add_dependencies(boost_filesystem boost_ep) + add_dependencies(boost_regex boost_ep) + add_dependencies(boost_thread boost_ep) + add_dependencies(boost_iostreams boost_ep) + add_dependencies(boost_system boost_ep) +endmacro(build_boost) + +macro(build_re2) + message(STATUS "Building RE2 from source") + set(RE2_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/re2_ep-install") + set(RE2_INCLUDE_DIR "${RE2_PREFIX}/include") + set(RE2_STATIC_LIB + "${RE2_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}re2${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + set(RE2_LIBRARIES ${RE2_STATIC_LIB}) + + set(RE2_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${RE2_PREFIX}") + + externalproject_add(re2_ep + ${EP_COMMON_OPTIONS} + INSTALL_DIR ${RE2_PREFIX} + URL ${RE2_SOURCE_URL} + URL_HASH "SHA256=${PAIMON_RE2_BUILD_SHA256_CHECKSUM}" + CMAKE_ARGS ${RE2_CMAKE_ARGS} ${THIRDPARTY_LOG_OPTIONS} + BUILD_BYPRODUCTS "${RE2_STATIC_LIB}") + + file(MAKE_DIRECTORY "${RE2_INCLUDE_DIR}") + + include_directories(SYSTEM ${RE2_INCLUDE_DIR}) + add_library(re2::re2 STATIC IMPORTED) + set_target_properties(re2::re2 PROPERTIES IMPORTED_LOCATION "${RE2_STATIC_LIB}") + target_include_directories(re2::re2 INTERFACE "${RE2_INCLUDE_DIR}") + add_dependencies(re2::re2 re2_ep) +endmacro() + +macro(build_snappy) + message(STATUS "Building snappy from source") + set(SNAPPY_HOME "${CMAKE_CURRENT_BINARY_DIR}/snappy_ep-install") + set(SNAPPY_INCLUDE_DIR "${SNAPPY_HOME}/include") + set(SNAPPY_STATIC_LIB + "${SNAPPY_HOME}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}snappy${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + set(SNAPPY_LIBRARIES ${SNAPPY_STATIC_LIB}) + set(SNAPPY_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} -DCMAKE_INSTALL_PREFIX=${SNAPPY_HOME} + -DSNAPPY_BUILD_TESTS=OFF -DSNAPPY_BUILD_BENCHMARKS=OFF) + + externalproject_add(snappy_ep + URL ${SNAPPY_SOURCE_URL} + URL_HASH "SHA256=${PAIMON_SNAPPY_BUILD_SHA256_CHECKSUM}" + CMAKE_ARGS ${SNAPPY_CMAKE_ARGS} ${THIRDPARTY_LOG_OPTIONS} + BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}") + + file(MAKE_DIRECTORY "${SNAPPY_INCLUDE_DIR}") + + include_directories(SYSTEM ${SNAPPY_INCLUDE_DIR}) + add_library(snappy STATIC IMPORTED) + set_target_properties(snappy PROPERTIES IMPORTED_LOCATION ${SNAPPY_STATIC_LIB}) + target_include_directories(snappy INTERFACE ${SNAPPY_INCLUDE_DIR}) + add_dependencies(snappy snappy_ep) +endmacro() + +macro(build_zlib) + message(STATUS "Building zlib from source") + set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep-install") + set(ZLIB_INCLUDE_DIR "${ZLIB_PREFIX}/include") + set(ZLIB_STATIC_LIB_NAME z) + set(ZLIB_STATIC_LIB + "${ZLIB_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${ZLIB_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + set(ZLIB_LIBRARIES ${ZLIB_STATIC_LIB}) + set(ZLIB_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} -DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX}) + + externalproject_add(zlib_ep + URL ${ZLIB_SOURCE_URL} + URL_HASH "SHA256=${PAIMON_ZLIB_BUILD_SHA256_CHECKSUM}" + CMAKE_ARGS ${ZLIB_CMAKE_ARGS} ${THIRDPARTY_LOG_OPTIONS} + BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}") + + file(MAKE_DIRECTORY "${ZLIB_INCLUDE_DIR}") + + include_directories(SYSTEM ${ZLIB_INCLUDE_DIR}) + add_library(zlib STATIC IMPORTED) + set_target_properties(zlib PROPERTIES IMPORTED_LOCATION ${ZLIB_STATIC_LIB}) + target_include_directories(zlib INTERFACE ${ZLIB_INCLUDE_DIR}) + add_dependencies(zlib zlib_ep) +endmacro() + +macro(build_zstd) + message(STATUS "Building zstd from source") + set(ZSTD_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zstd_ep-install") + set(ZSTD_INCLUDE_DIR "${ZSTD_PREFIX}/include") + set(ZSTD_STATIC_LIB_NAME zstd) + set(ZSTD_STATIC_LIB + "${ZSTD_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${ZSTD_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + set(ZSTD_LIBRARIES ${ZSTD_STATIC_LIB}) + set(ZSTD_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} -Wno-error") + set(ZSTD_CMAKE_C_FLAGS "${EP_C_FLAGS} -Wno-error") + string(REPLACE "-Werror" "" ZSTD_CMAKE_CXX_FLAGS ${ZSTD_CMAKE_CXX_FLAGS}) + + set(ZSTD_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + -DCMAKE_INSTALL_PREFIX=${ZSTD_PREFIX} + "-DCMAKE_CXX_FLAGS=${ZSTD_CMAKE_CXX_FLAGS}" + "-DCMAKE_C_FLAGS=${ZSTD_CMAKE_C_FLAGS}" + -DZSTD_BUILD_SHARED=OFF + -DZSTD_BUILD_PROGRAMS=OFF) + + set(ZSTD_CONFIGURE SOURCE_SUBDIR "build/cmake" CMAKE_ARGS ${ZSTD_CMAKE_ARGS}) + externalproject_add(zstd_ep + URL ${ZSTD_SOURCE_URL} + URL_HASH "SHA256=${PAIMON_ZSTD_BUILD_SHA256_CHECKSUM}" + ${ZSTD_CONFIGURE} ${THIRDPARTY_LOG_OPTIONS} + BUILD_BYPRODUCTS ${ZSTD_STATIC_LIB}) + + file(MAKE_DIRECTORY "${ZSTD_INCLUDE_DIR}") + + include_directories(SYSTEM ${ZSTD_INCLUDE_DIR}) + add_library(zstd STATIC IMPORTED) + set_target_properties(zstd PROPERTIES IMPORTED_LOCATION ${ZSTD_STATIC_LIB}) + target_include_directories(zstd INTERFACE ${ZSTD_INCLUDE_DIR}) + add_dependencies(zstd zstd_ep) +endmacro(build_zstd) + +macro(build_lz4) + message(STATUS "Building lz4 from source") + set(LZ4_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/lz4_ep-install") + set(LZ4_INCLUDE_DIR "${LZ4_PREFIX}/include") + set(LZ4_STATIC_LIB + "${LZ4_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}lz4${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + set(LZ4_LIBRARIES ${LZ4_STATIC_LIB}) + set(LZ4_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} -DCMAKE_INSTALL_PREFIX=${LZ4_PREFIX} + -DLZ4_BUILD_CLI=OFF -DLZ4_BUILD_LEGACY_LZ4C=OFF) + + set(LZ4_CONFIGURE SOURCE_SUBDIR "build/cmake" CMAKE_ARGS ${LZ4_CMAKE_ARGS}) + externalproject_add(lz4_ep + URL ${LZ4_SOURCE_URL} + URL_HASH "SHA256=${PAIMON_LZ4_BUILD_SHA256_CHECKSUM}" + ${LZ4_CONFIGURE} ${THIRDPARTY_LOG_OPTIONS} + BUILD_BYPRODUCTS ${LZ4_STATIC_LIB}) + + file(MAKE_DIRECTORY "${LZ4_INCLUDE_DIR}") + + include_directories(SYSTEM ${LZ4_INCLUDE_DIR}) + add_library(lz4 STATIC IMPORTED) + set_target_properties(lz4 PROPERTIES IMPORTED_LOCATION ${LZ4_STATIC_LIB}) + target_include_directories(lz4 INTERFACE ${LZ4_INCLUDE_DIR}) + add_dependencies(lz4 lz4_ep) +endmacro() + +macro(build_jindosdk_c) + message(STATUS "Building jindosdk-c from precompiled package") + + set(JINDOSDK_C_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/jindosdk_ep-install") + set(JINDOSDK_C_HOME "${JINDOSDK_C_PREFIX}") + set(JINDOSDK_C_INCLUDE_DIR "${JINDOSDK_C_PREFIX}/include") + set(JINDOSDK_C_LIB_DIR "${JINDOSDK_C_PREFIX}/lib/native") + set(JINDOSDK_C_DYNAMIC_LIB "${JINDOSDK_C_LIB_DIR}/${JINDOSDK_C_DYNAMIC_LIB_FILE}") + + # Extract and install jindosdk from precompiled package + externalproject_add(jindosdk_ep + URL ${JINDOSDK_C_SOURCE_URL} + URL_HASH "SHA256=${JINDOSDK_C_BUILD_SHA256_CHECKSUM}" + ${THIRDPARTY_LOG_OPTIONS} + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND bash -c + "cp -r <SOURCE_DIR>/include/* ${JINDOSDK_C_INCLUDE_DIR}" + COMMAND bash -c + "cp -r <SOURCE_DIR>/lib/native/${JINDOSDK_C_DYNAMIC_LIB_FILE}* ${JINDOSDK_C_LIB_DIR}" + BUILD_BYPRODUCTS "${JINDOSDK_C_DYNAMIC_LIB}") + + # The include directory must exist before it is referenced by a target. + file(MAKE_DIRECTORY "${JINDOSDK_C_INCLUDE_DIR}") + file(MAKE_DIRECTORY "${JINDOSDK_C_LIB_DIR}") + + add_library(jindosdk::c_sdk SHARED IMPORTED) + set_target_properties(jindosdk::c_sdk + PROPERTIES IMPORTED_LOCATION "${JINDOSDK_C_DYNAMIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES + "${JINDOSDK_C_INCLUDE_DIR}") + list(APPEND JINDOSDK_INCLUDE_DIR ${JINDOSDK_C_INCLUDE_DIR}) + + add_dependencies(jindosdk::c_sdk jindosdk_ep) + install(DIRECTORY "${JINDOSDK_C_LIB_DIR}/" + DESTINATION ${CMAKE_INSTALL_LIBDIR} + FILES_MATCHING + PATTERN "${JINDOSDK_C_DYNAMIC_LIB_FILE}*") + +endmacro() + +macro(build_jindosdk_nextarch) + message(STATUS "Building jindosdk-nextarch from local source") + + set(JINDOSDK_NEXTARCH_PREFIX + "${CMAKE_CURRENT_BINARY_DIR}/jindosdk-nextarch_ep-install") + set(JINDOSDK_NEXTARCH_HOME "${JINDOSDK_NEXTARCH_PREFIX}") + set(JINDOSDK_NEXTARCH_INCLUDE_DIR "${JINDOSDK_NEXTARCH_PREFIX}/include") + set(JINDOSDK_NEXTARCH_LIB_DIR "${JINDOSDK_NEXTARCH_PREFIX}/lib") + set(JINDOSDK_NEXTARCH_SOURCE_DIR "${CMAKE_SOURCE_DIR}/third_party/jindosdk-nextarch") + set(JINDOSDK_NEXTARCH_STATIC_LIB + "${JINDOSDK_NEXTARCH_LIB_DIR}/libjindosdk-nextarch.a") + + # Get jindosdk dependencies (headers and dynamic library) + get_target_property(JINDOSDK_C_INCLUDE_DIR jindosdk::c_sdk + INTERFACE_INCLUDE_DIRECTORIES) + get_target_property(JINDOSDK_C_LIBRARY_LOCATION jindosdk::c_sdk IMPORTED_LOCATION) + get_filename_component(JINDOSDK_C_DIR_ROOT "${JINDOSDK_C_INCLUDE_DIR}" DIRECTORY) + + # Compile flags for jindosdk-nextarch + set(JINDOSDK_NEXTARCH_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS}") + set(JINDOSDK_NEXTARCH_CMAKE_C_FLAGS "${EP_C_FLAGS}") + set(JINDOSDK_NEXTARCH_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + "-DCMAKE_INSTALL_PREFIX=${JINDOSDK_NEXTARCH_PREFIX}" + "-DCMAKE_CXX_FLAGS=${JINDOSDK_NEXTARCH_CMAKE_CXX_FLAGS}" + "-DCMAKE_C_FLAGS=${JINDOSDK_NEXTARCH_CMAKE_C_FLAGS}" + -DJINDOSDK_ROOT=${JINDOSDK_C_DIR_ROOT} + -DJINDOSDK_LIBRARY_NAME=${JINDOSDK_C_DYNAMIC_LIB_NAME}) + + externalproject_add(jindosdk-nextarch_ep + SOURCE_DIR ${JINDOSDK_NEXTARCH_SOURCE_DIR} + CMAKE_ARGS ${JINDOSDK_NEXTARCH_CMAKE_ARGS} + BUILD_BYPRODUCTS "${JINDOSDK_NEXTARCH_STATIC_LIB}" + DEPENDS jindosdk::c_sdk ${THIRDPARTY_LOG_OPTIONS}) + + # The include directory must exist before it is referenced by a target. + file(MAKE_DIRECTORY "${JINDOSDK_NEXTARCH_INCLUDE_DIR}") + file(MAKE_DIRECTORY "${JINDOSDK_NEXTARCH_LIB_DIR}") + + add_library(jindosdk::nextarch STATIC IMPORTED) + set_target_properties(jindosdk::nextarch + PROPERTIES IMPORTED_LOCATION "${JINDOSDK_NEXTARCH_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES + "${JINDOSDK_NEXTARCH_INCLUDE_DIR}") + target_link_libraries(jindosdk::nextarch INTERFACE jindosdk::c_sdk pthread dl) + list(APPEND JINDOSDK_INCLUDE_DIR ${JINDOSDK_NEXTARCH_INCLUDE_DIR}) + + add_dependencies(jindosdk::nextarch jindosdk-nextarch_ep) +endmacro() + +macro(build_protobuf) + message(STATUS "Building protobuf from source") + set(PROTOBUF_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep-install") + set(PROTOBUF_INCLUDE_DIR "${PROTOBUF_PREFIX}/include") + set(PROTOBUF_STATIC_LIB + "${PROTOBUF_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}protobuf${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + set(PROTOC_STATIC_LIB + "${PROTOBUF_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}protoc${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + set(PROTOBUF_LIBRARIES ${PROTOBUF_STATIC_LIB}) + set(PROTOBUF_COMPILER "${PROTOBUF_PREFIX}/bin/protoc") + + get_target_property(THIRDPARTY_ZLIB_INCLUDE_DIR zlib INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(THIRDPARTY_ZLIB_ROOT "${THIRDPARTY_ZLIB_INCLUDE_DIR}" + DIRECTORY) + + # Strip lto flags (which may be added by dh_auto_configure) + # See https://github.com/protocolbuffers/protobuf/issues/7092 + set(PROTOBUF_C_FLAGS ${EP_C_FLAGS}) + set(PROTOBUF_CXX_FLAGS ${EP_CXX_FLAGS}) + string(REPLACE "-flto=auto" "" PROTOBUF_C_FLAGS "${PROTOBUF_C_FLAGS}") + string(REPLACE "-ffat-lto-objects" "" PROTOBUF_C_FLAGS "${PROTOBUF_C_FLAGS}") + string(REPLACE "-flto=auto" "" PROTOBUF_CXX_FLAGS "${PROTOBUF_CXX_FLAGS}") + string(REPLACE "-ffat-lto-objects" "" PROTOBUF_CXX_FLAGS "${PROTOBUF_CXX_FLAGS}") + string(APPEND PROTOBUF_CXX_FLAGS + " -Wno-inconsistent-missing-override -Wno-unneeded-internal-declaration") + set(PROTOBUF_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + -DCMAKE_INSTALL_PREFIX=${PROTOBUF_PREFIX} + "-DCMAKE_CXX_FLAGS=${PROTOBUF_CXX_FLAGS}" + "-DCMAKE_C_FLAGS=${PROTOBUF_C_FLAGS}" + "-DZLIB_ROOT=${THIRDPARTY_ZLIB_ROOT}" + -Dprotobuf_BUILD_TESTS=OFF + -Dprotobuf_DEBUG_POSTFIX=) + set(PROTOBUF_CONFIGURE SOURCE_SUBDIR "cmake" CMAKE_ARGS ${PROTOBUF_CMAKE_ARGS}) + + externalproject_add(protobuf_ep + URL ${PROTOBUF_SOURCE_URL} + URL_HASH "SHA256=${PAIMON_PROTOBUF_BUILD_SHA256_CHECKSUM}" + ${PROTOBUF_CONFIGURE} ${THIRDPARTY_LOG_OPTIONS} + # BUILD_IN_SOURCE 1 + BUILD_BYPRODUCTS "${PROTOBUF_STATIC_LIB}" "${PROTOBUF_COMPILER}" + DEPENDS zlib) + + file(MAKE_DIRECTORY "${PROTOBUF_INCLUDE_DIR}") + + include_directories(SYSTEM ${PROTOBUF_INCLUDE_DIR}) + add_library(libprotobuf STATIC IMPORTED) + set_target_properties(libprotobuf PROPERTIES IMPORTED_LOCATION ${PROTOBUF_STATIC_LIB}) + target_include_directories(libprotobuf INTERFACE ${PROTOBUF_INCLUDE_DIR}) + add_library(libprotoc STATIC IMPORTED) + set_target_properties(libprotoc PROPERTIES IMPORTED_LOCATION ${PROTOC_STATIC_LIB}) + target_include_directories(libprotoc INTERFACE ${PROTOBUF_INCLUDE_DIR}) + + add_executable(protoc IMPORTED) + set_target_properties(protoc PROPERTIES IMPORTED_LOCATION ${PROTOBUF_COMPILER}) + + add_dependencies(libprotobuf protobuf_ep) + add_dependencies(protoc protobuf_ep) +endmacro() + +macro(build_avro) + message(STATUS "Building avro from source") + set(AVRO_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/avro_ep-install") + set(AVRO_INCLUDE_DIR "${AVRO_PREFIX}/include") + set(AVRO_STATIC_LIB_NAME avrocpp_s) + set(AVRO_STATIC_LIB + "${AVRO_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${AVRO_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + set(AVRO_LIBRARIES ${AVRO_STATIC_LIB}) + + get_target_property(AVRO_SNAPPY_INCLUDE_DIR snappy INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(AVRO_SNAPPY_ROOT "${AVRO_SNAPPY_INCLUDE_DIR}" DIRECTORY) + + get_target_property(AVRO_ZSTD_INCLUDE_DIR zstd INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(AVRO_ZSTD_ROOT "${AVRO_ZSTD_INCLUDE_DIR}" DIRECTORY) + + get_target_property(AVRO_ZLIB_INCLUDE_DIR zlib INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(AVRO_ZLIB_ROOT "${AVRO_ZLIB_INCLUDE_DIR}" DIRECTORY) + + get_target_property(AVRO_FMT_INCLUDE_DIR fmt INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(AVRO_FMT_ROOT "${AVRO_FMT_INCLUDE_DIR}" DIRECTORY) + + set(AVRO_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} -Wno-error") + set(AVRO_CMAKE_C_FLAGS "${EP_C_FLAGS} -Wno-error") + + set(AVRO_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + "-DCMAKE_INSTALL_PREFIX=${AVRO_PREFIX}" + "-DCMAKE_CXX_FLAGS=${AVRO_CMAKE_CXX_FLAGS}" + "-DCMAKE_C_FLAGS=${AVRO_CMAKE_C_FLAGS}" + "-DAVRO_BUILD_TESTS=OFF" + "-DAVRO_BUILD_EXECUTABLES=OFF" + "-DZLIB_ROOT=${AVRO_ZLIB_ROOT}" + "-Dfmt_ROOT=${AVRO_FMT_ROOT}" + "-Dzstd_ROOT=${AVRO_ZSTD_ROOT}" + "-DSnappy_ROOT=${AVRO_SNAPPY_ROOT}") + externalproject_add(avro_ep + URL ${AVRO_SOURCE_URL} + URL_HASH "SHA256=${PAIMON_AVRO_BUILD_SHA256_CHECKSUM}" + SOURCE_SUBDIR "lang/c++" + CMAKE_ARGS ${AVRO_CMAKE_ARGS} + BUILD_BYPRODUCTS "${AVRO_STATIC_LIB}" + DEPENDS fmt zlib zstd snappy) + + file(MAKE_DIRECTORY "${AVRO_INCLUDE_DIR}") + + include_directories(SYSTEM ${AVRO_INCLUDE_DIR}) + add_library(avro STATIC IMPORTED) + set_target_properties(avro PROPERTIES IMPORTED_LOCATION ${AVRO_STATIC_LIB}) + target_include_directories(avro INTERFACE ${AVRO_INCLUDE_DIR}) + target_link_libraries(avro INTERFACE zlib zstd snappy) + add_dependencies(avro avro_ep) +endmacro() + +macro(build_orc) + message(STATUS "Building orc from source") + + get_target_property(ORC_SNAPPY_INCLUDE_DIR snappy INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_SNAPPY_ROOT "${ORC_SNAPPY_INCLUDE_DIR}" DIRECTORY) + + get_target_property(ORC_LZ4_INCLUDE_DIR lz4 INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_INCLUDE_DIR}" DIRECTORY) + + get_target_property(ORC_ZSTD_INCLUDE_DIR zstd INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_INCLUDE_DIR}" DIRECTORY) + + get_target_property(ORC_ZLIB_INCLUDE_DIR zlib INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_ZLIB_ROOT "${ORC_ZLIB_INCLUDE_DIR}" DIRECTORY) + + get_target_property(ORC_PROTOBUF_INCLUDE_DIR libprotobuf + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_PROTOBUF_ROOT "${ORC_PROTOBUF_INCLUDE_DIR}" DIRECTORY) + + get_property(PAIMON_RPATH GLOBAL PROPERTY PAIMON_RPATH) + message(STATUS "PAIMON_RPATH value: ${PAIMON_RPATH}") + set(ORC_RPATH ${PAIMON_RPATH}) + message(STATUS "ORC_RPATH value: ${ORC_RPATH}") + + string(REPLACE "-Werror" "" EP_CXX_FLAGS ${EP_CXX_FLAGS}) + + set(ORC_CMAKE_CXX_FLAGS + "${EP_CXX_FLAGS} -fPIC -Wno-error ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}}") + set(ORC_CMAKE_C_FLAGS + "${EP_C_FLAGS} -fPIC -Wno-error ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}}") + + set(ORC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/orc_ep-prefix") + set(ORC_INCLUDE_DIR "${ORC_PREFIX}/include") + set(ORC_SOURCE_DIR "${ORC_PREFIX}/cpp") + set(ORC_BUILD_DIR "${CMAKE_BINARY_DIR}/build/orc") + + set(ORC_STATIC_LIB "${ORC_PREFIX}/lib/liborc.a") + + message("ORC_STATIC_LIB IS ${ORC_STATIC_LIB}") + message("ORC_CMAKE_CXX_FLAGS ${ORC_CMAKE_CXX_FLAGS}") + message("ORC_CMAKE_C_FLAGS ${ORC_CMAKE_C_FLAGS}") + + set(ORC_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}" + "-DCMAKE_CXX_FLAGS=${ORC_CMAKE_CXX_FLAGS}" + "-DCMAKE_C_FLAGS=${ORC_CMAKE_C_FLAGS}" + "-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${ORC_CMAKE_CXX_FLAGS}" + "-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath=${ORC_RPATH}" + "-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath=${ORC_RPATH}" + "-DCMAKE_MODULE_LINKER_FLAGS=-Wl,-rpath=${ORC_RPATH}" + "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}" + "-DLZ4_HOME=${ORC_LZ4_ROOT}" + "-DZSTD_HOME=${ORC_ZSTD_ROOT}" + "-DZLIB_HOME=${ORC_ZLIB_ROOT}" + "-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}" + "-DProtobuf_ROOT=${ORC_PROTOBUF_ROOT}" + -DBUILD_JAVA=OFF + -DBUILD_CPP_TESTS=OFF + -DBUILD_TOOLS=OFF + -DBUILD_CPP_ENABLE_METRICS=ON) + + set(PATCH_FILE "${CMAKE_CURRENT_LIST_DIR}/orc.diff") + externalproject_add(orc_ep + URL ${ORC_SOURCE_URL} + URL_HASH "SHA256=${PAIMON_ORC_BUILD_SHA256_CHECKSUM}" + SOURCE_DIR ${ORC_SOURCE_DIR} + BINARY_DIR ${ORC_BUILD_DIR} + CMAKE_ARGS ${ORC_CMAKE_ARGS} + LOG_PATCH ON + PATCH_COMMAND ${CMAKE_COMMAND} -E chdir <SOURCE_DIR> bash -c + "[ -f .patched ] && echo '<SOURCE_DIR> patch already applied, ignore...' || patch -s -N -p1 -i '${PATCH_FILE}' && touch .patched" + UPDATE_DISCONNECTED 1 + BUILD_BYPRODUCTS ${ORC_STATIC_LIB} + DEPENDS zstd + snappy + lz4 + zlib + libprotobuf) + + # The include directory must exist before it is referenced by a target. + file(MAKE_DIRECTORY "${ORC_INCLUDE_DIR}") + + add_library(orc::orc STATIC IMPORTED) + set_target_properties(orc::orc + PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${ORC_INCLUDE_DIR}") + target_link_libraries(orc::orc + INTERFACE zstd + snappy + lz4 + zlib + libprotobuf) + + add_dependencies(orc::orc orc_ep) +endmacro() + +macro(build_arrow) + message(STATUS "Building Arrow from source") + + get_target_property(ARROW_SNAPPY_INCLUDE_DIR snappy INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ARROW_SNAPPY_ROOT "${ARROW_SNAPPY_INCLUDE_DIR}" DIRECTORY) + + get_target_property(ARROW_LZ4_INCLUDE_DIR lz4 INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ARROW_LZ4_ROOT "${ARROW_LZ4_INCLUDE_DIR}" DIRECTORY) + + get_target_property(ARROW_ZSTD_INCLUDE_DIR zstd INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ARROW_ZSTD_ROOT "${ARROW_ZSTD_INCLUDE_DIR}" DIRECTORY) + + get_target_property(ARROW_ZLIB_INCLUDE_DIR zlib INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ARROW_ZLIB_ROOT "${ARROW_ZLIB_INCLUDE_DIR}" DIRECTORY) + + get_target_property(ARROW_RE2_INCLUDE_DIR re2::re2 INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ARROW_RE2_ROOT "${ARROW_RE2_INCLUDE_DIR}" DIRECTORY) + + set(ARROW_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} -Wno-error") + set(ARROW_CMAKE_C_FLAGS "${EP_C_FLAGS} -Wno-error") + string(REPLACE "-Werror" "" ARROW_CMAKE_CXX_FLAGS ${ARROW_CMAKE_CXX_FLAGS}) + # Fix for thrift Mutex.h missing #include <cstdint> (GCC 15 strictness) + # Use -include to force include cstdint for all C++ files + string(APPEND ARROW_CMAKE_CXX_FLAGS " -include cstdint") + + set(ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-install") + set(ARROW_HOME "${ARROW_PREFIX}") + set(ARROW_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix/src/arrow_ep") + + set(_ARROW_LIBRARY_SUFFIX "${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(ARROW_INCLUDE_DIR "${ARROW_PREFIX}/include") + + file(MAKE_DIRECTORY "${ARROW_INCLUDE_DIR}") + + set(ARROW_BUILD_DIR "${CMAKE_BINARY_DIR}/arrow") + set(ARROW_STATIC_LIB + "${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}arrow${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + set(ARROW_DATASET_STATIC_LIB + "${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}arrow_dataset${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + set(ARROW_ACERO_STATIC_LIB + "${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}arrow_acero${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + set(ARROW_BUNDLED_DEP_STATIC_LIB + "${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}arrow_bundled_dependencies${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + set(PARQUET_STATIC_LIB + "${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}parquet${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + + set(ARROW_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + "-DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}" + "-DCMAKE_CXX_FLAGS=${ARROW_CMAKE_CXX_FLAGS}" + "-DCMAKE_C_FLAGS=${ARROW_CMAKE_C_FLAGS}" + "-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${ARROW_CMAKE_CXX_FLAGS}" + -DARROW_DEPENDENCY_USE_SHARED=OFF + -DARROW_BUILD_SHARED=OFF + -DARROW_BUILD_STATIC=ON + -DARROW_BUILD_TESTS=OFF + -DARROW_BUILD_BENCHMARKS=OFF + -DARROW_BUILD_EXAMPLES=OFF + -DARROW_JEMALLOC=OFF + -DARROW_WITH_RE2=ON + -DARROW_WITH_UTF8PROC=OFF + -DARROW_ORC=OFF + -DARROW_SIMD_LEVEL=NONE + -DARROW_RUNTIME_SIMD_LEVEL=NONE + -DARROW_PARQUET=ON + -DARROW_IPC=ON + -DARROW_DATASET=ON + -DARROW_JSON=ON + -DARROW_COMPUTE=ON + -DARROW_WITH_SNAPPY=ON + -DARROW_WITH_ZLIB=ON + -DARROW_WITH_LZ4=ON + -DARROW_WITH_ZSTD=ON + -DARROW_WITH_BZ2=OFF + -DARROW_WITH_BROTLI=ON + -DZSTD_ROOT=${ARROW_ZSTD_ROOT} + -DZLIB_ROOT=${ARROW_ZLIB_ROOT} + -DSnappy_ROOT=${ARROW_SNAPPY_ROOT} + -DLZ4_ROOT=${ARROW_LZ4_ROOT} + -Dre2_ROOT=${ARROW_RE2_ROOT} + -DBUILD_WARNING_LEVEL=PRODUCTION) # ignore warnings under gcc8 + + set(ARROW_CONFIGURE SOURCE_SUBDIR "cpp" CMAKE_ARGS ${ARROW_CMAKE_ARGS}) + set(PATCH_FILE "${CMAKE_CURRENT_LIST_DIR}/arrow.diff") + externalproject_add(arrow_ep + URL ${ARROW_SOURCE_URL} + URL_HASH "SHA256=${PAIMON_ARROW_BUILD_SHA256_CHECKSUM}" + LOG_PATCH ON + PATCH_COMMAND ${CMAKE_COMMAND} -E chdir <SOURCE_DIR> bash -c + "[ -f .patched ] && echo '<SOURCE_DIR> patch already applied, ignore...' || patch -s -N -p1 -i '${PATCH_FILE}' && touch .patched" + GIT_SUBMODULES "" GIT_SUBMODULES_RECURSE FALSE ${ARROW_CONFIGURE} + UPDATE_DISCONNECTED 1 + BUILD_BYPRODUCTS "${ARROW_STATIC_LIB}" + "${ARROW_BUNDLED_DEP_STATIC_LIB}" + "${PARQUET_STATIC_LIB}" + "${ARROW_DATASET_STATIC_LIB}" + "${ARROW_ACERO_STATIC_LIB}" + DEPENDS zstd + snappy + lz4 + zlib + re2::re2) + + add_library(arrow STATIC IMPORTED) + set_target_properties(arrow + PROPERTIES IMPORTED_LOCATION "${ARROW_PREFIX}/lib/libarrow.a" + INTERFACE_INCLUDE_DIRECTORIES "${ARROW_INCLUDE_DIR}" + INTERFACE_LINK_DIRECTORIES + "${ARROW_BUILD_DIR}/${LOWERCASE_BUILD_TYPE}") + + add_library(arrow_dataset STATIC IMPORTED) + set_target_properties(arrow_dataset + PROPERTIES IMPORTED_LOCATION + "${ARROW_PREFIX}/lib/libarrow_dataset.a" + INTERFACE_INCLUDE_DIRECTORIES "${ARROW_INCLUDE_DIR}" + INTERFACE_LINK_DIRECTORIES + "${ARROW_BUILD_DIR}/${LOWERCASE_BUILD_TYPE}") + + add_library(arrow_acero STATIC IMPORTED) + set_target_properties(arrow_acero + PROPERTIES IMPORTED_LOCATION + "${ARROW_PREFIX}/lib/libarrow_acero.a" + INTERFACE_INCLUDE_DIRECTORIES "${ARROW_INCLUDE_DIR}" + INTERFACE_LINK_DIRECTORIES + "${ARROW_BUILD_DIR}/${LOWERCASE_BUILD_TYPE}") + + add_library(parquet STATIC IMPORTED) + set_target_properties(parquet + PROPERTIES IMPORTED_LOCATION "${ARROW_PREFIX}/lib/libparquet.a" + INTERFACE_INCLUDE_DIRECTORIES "${ARROW_INCLUDE_DIR}" + INTERFACE_LINK_DIRECTORIES + "${ARROW_BUILD_DIR}/${LOWERCASE_BUILD_TYPE}") + + add_library(arrow_bundled_dependencies STATIC IMPORTED) + set_target_properties(arrow_bundled_dependencies + PROPERTIES IMPORTED_LOCATION + "${ARROW_PREFIX}/lib/libarrow_bundled_dependencies.a" + INTERFACE_INCLUDE_DIRECTORIES "${ARROW_INCLUDE_DIR}" + INTERFACE_LINK_DIRECTORIES + "${ARROW_BUILD_DIR}/${LOWERCASE_BUILD_TYPE}") + + add_dependencies(arrow arrow_ep) + add_dependencies(parquet arrow_ep) + add_dependencies(arrow_bundled_dependencies arrow_ep) + add_dependencies(arrow_dataset arrow_ep) + add_dependencies(arrow_acero arrow_ep) + + target_link_libraries(arrow_acero INTERFACE arrow) + + target_link_libraries(arrow_dataset INTERFACE arrow_acero) + + target_link_libraries(arrow + INTERFACE zstd + snappy + lz4 + zlib + re2::re2 + arrow_bundled_dependencies) + + target_link_libraries(parquet + INTERFACE zstd + snappy + lz4 + zlib + arrow_bundled_dependencies + arrow_dataset) + +endmacro(build_arrow) + +macro(build_gtest) + message(STATUS "Building gtest from source") + + set(GTEST_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} -Wno-error") + string(REPLACE "-Werror" "" GTEST_CMAKE_CXX_FLAGS ${GTEST_CMAKE_CXX_FLAGS}) + + set(GTEST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/googletest_ep-install") + set(GTEST_INCLUDE_DIR "${GTEST_PREFIX}/include") + + set(_GTEST_RUNTIME_DIR ${BUILD_OUTPUT_ROOT_DIRECTORY}) + + # Library and runtime same on non-Windows + set(_GTEST_LIBRARY_DIR "${_GTEST_RUNTIME_DIR}") + + if(LOWERCASE_BUILD_TYPE STREQUAL "debug") + set(GTEST_STATIC_LIB "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtestd.a") + set(GMOCK_STATIC_LIB "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gmockd.a") + set(GTEST_MAIN_STATIC_LIB + "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest_maind.a") + else() + set(GTEST_STATIC_LIB "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest.a") + set(GMOCK_STATIC_LIB "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gmock.a") + set(GTEST_MAIN_STATIC_LIB + "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest_main.a") + endif() + set(GTEST_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + "-DCMAKE_INSTALL_PREFIX=${GTEST_PREFIX}" + "-DCMAKE_CXX_FLAGS=${GTEST_CMAKE_CXX_FLAGS}" + "-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${GTEST_CMAKE_CXX_FLAGS}" + "-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${_GTEST_RUNTIME_DIR}" + "-DCMAKE_RUNTIME_OUTPUT_DIRECTORY_${CMAKE_BUILD_TYPE}=${_GTEST_RUNTIME_DIR}") + + externalproject_add(googletest_ep + URL ${GTEST_SOURCE_URL} + URL_HASH "SHA256=${PAIMON_GTEST_BUILD_SHA256_CHECKSUM}" + CMAKE_ARGS ${GTEST_CMAKE_ARGS} + BUILD_BYPRODUCTS "${GTEST_STATIC_LIB}" "${GTEST_MAIN_STATIC_LIB}" + "${GMOCK_STATIC_LIB}") + + # The include directory must exist before it is referenced by a target. + file(MAKE_DIRECTORY "${GTEST_INCLUDE_DIR}") + + add_library(GTest::gtest STATIC IMPORTED) + set_target_properties(GTest::gtest + PROPERTIES IMPORTED_LOCATION "${GTEST_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${GTEST_INCLUDE_DIR}") + + add_library(GTest::gtest_main STATIC IMPORTED) + set_target_properties(GTest::gtest_main + PROPERTIES IMPORTED_LOCATION "${GTEST_MAIN_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${GTEST_INCLUDE_DIR}") + + add_library(GTest::gmock STATIC IMPORTED) + set_target_properties(GTest::gmock + PROPERTIES IMPORTED_LOCATION "${GMOCK_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${GTEST_INCLUDE_DIR}") + add_dependencies(GTest::gtest googletest_ep) + add_dependencies(GTest::gtest_main googletest_ep) + add_dependencies(GTest::gmock googletest_ep) + + find_package(Threads REQUIRED) + set(GTEST_LINK_TOOLCHAIN GTest::gtest_main GTest::gtest GTest::gmock Threads::Threads) +endmacro() + +macro(build_tbb) + message(STATUS "Building Tbb from source") + + set(TBB_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} -Wno-error") + set(TBB_CMAKE_C_FLAGS "${EP_C_FLAGS} -Wno-error") + string(REPLACE "-Werror" "" TBB_CMAKE_CXX_FLAGS ${TBB_CMAKE_CXX_FLAGS}) + + string(REPLACE "-Wdocumentation" "" TBB_CMAKE_CXX_FLAGS ${TBB_CMAKE_CXX_FLAGS}) + string(REPLACE "-Wdocumentation" "" TBB_CMAKE_C_FLAGS ${TBB_CMAKE_C_FLAGS}) + + set(TBB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/tbb_ep-install") + + if(LOWERCASE_BUILD_TYPE STREQUAL "debug") + set(TBB_STATIC_LIB "${TBB_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}tbb_debug.a") + else() + set(TBB_STATIC_LIB "${TBB_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}tbb.a") + endif() + set(TBB_INCLUDE_DIR "${TBB_PREFIX}/include") + + file(MAKE_DIRECTORY "${TBB_INCLUDE_DIR}") + + set(TBB_BUILD_DIR "${CMAKE_BINARY_DIR}/tbb") + + set(TBB_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + "-DCMAKE_INSTALL_PREFIX=${TBB_PREFIX}" + "-DCMAKE_CXX_FLAGS=${TBB_CMAKE_CXX_FLAGS}" + "-DCMAKE_C_FLAGS=${TBB_CMAKE_C_FLAGS}" + "-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${TBB_CMAKE_CXX_FLAGS}" + -DTBB_TEST=OFF) + + externalproject_add(tbb_ep + URL ${TBB_SOURCE_URL} + URL_HASH "SHA256=${PAIMON_TBB_BUILD_SHA256_CHECKSUM}" + CMAKE_ARGS ${TBB_CMAKE_ARGS} + BUILD_BYPRODUCTS "${TBB_STATIC_LIB}") + + add_library(tbb STATIC IMPORTED) + set_target_properties(tbb + PROPERTIES IMPORTED_LOCATION "${TBB_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${TBB_INCLUDE_DIR}" + INTERFACE_LINK_DIRECTORIES + "${TBB_BUILD_DIR}/${LOWERCASE_BUILD_TYPE}") + add_dependencies(tbb tbb_ep) + +endmacro(build_tbb) + +macro(build_glog) + message(STATUS "Building glog from source") + set(GLOG_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/glog_ep-install") + set(GLOG_INCLUDE_DIR "${GLOG_PREFIX}/include") + if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG") + set(GLOG_LIB_SUFFIX "d") + else() + set(GLOG_LIB_SUFFIX "") + endif() + set(GLOG_STATIC_LIB "${GLOG_PREFIX}/lib/libglog${GLOG_LIB_SUFFIX}.a") + set(GLOG_CMAKE_CXX_FLAGS " -Wno-error ${EP_CXX_FLAGS}") + set(GLOG_CMAKE_C_FLAGS " -Wno-error ${EP_C_FLAGS}") + if(CMAKE_THREAD_LIBS_INIT) + string(APPEND GLOG_CMAKE_CXX_FLAGS " ${CMAKE_THREAD_LIBS_INIT}") + string(APPEND GLOG_CMAKE_C_FLAGS " ${CMAKE_THREAD_LIBS_INIT}") + endif() + + set(GLOG_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + -DCMAKE_INSTALL_PREFIX=${GLOG_PREFIX} + -DWITH_GFLAGS=OFF + -DWITH_GTEST=OFF + -DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS} + -DCMAKE_C_FLAGS=${GLOG_CMAKE_C_FLAGS}) + + externalproject_add(glog_ep + URL ${GLOG_SOURCE_URL} + URL_HASH "SHA256=${PAIMON_GLOG_BUILD_SHA256_CHECKSUM}" + CMAKE_ARGS ${GLOG_CMAKE_ARGS} + BUILD_BYPRODUCTS "${GLOG_STATIC_LIB}") + + file(MAKE_DIRECTORY "${GLOG_INCLUDE_DIR}") + add_library(glog STATIC IMPORTED) + set_target_properties(glog + PROPERTIES IMPORTED_LOCATION "${GLOG_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${GLOG_INCLUDE_DIR}" + INTERFACE_LINK_DIRECTORIES "${GLOG_BUILD_DIR}/lib" + INTERFACE_COMPILE_DEFINITIONS "GLOG_USE_GLOG_EXPORT") + + add_dependencies(glog glog_ep) + + find_library(LIBUNWIND_LIBRARY NAMES unwind) + if(LIBUNWIND_LIBRARY) + target_link_libraries(glog INTERFACE ${LIBUNWIND_LIBRARY}) + endif() +endmacro() + +resolve_dependency(fmt) +resolve_dependency(RapidJSON) +paimon_apply_dependency_source_defaults() +resolve_dependency(RE2) +resolve_dependency(Snappy) +resolve_dependency(zstd) +resolve_dependency(ZLIB) +resolve_dependency(LZ4) +resolve_dependency(Arrow) +paimon_warn_if_mixed_arrow_dependencies() +resolve_dependency(TBB) +resolve_dependency(glog) + +if(PAIMON_ENABLE_AVRO) + resolve_dependency(Avro) +endif() +if(PAIMON_ENABLE_ORC) + resolve_dependency(Protobuf) + resolve_dependency(ORC) +endif() +if(PAIMON_ENABLE_JINDO) + build_jindosdk_c() + build_jindosdk_nextarch() +endif() +if(PAIMON_ENABLE_LUCENE) + build_boost() + build_lucene() + build_jieba() +endif() diff --git a/cmake_modules/arrow.diff b/cmake_modules/arrow.diff new file mode 100644 index 0000000..e539d1f --- /dev/null +++ b/cmake_modules/arrow.diff @@ -0,0 +1,213 @@ +diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc +index ec3890a41f..943f69bb6c 100644 +--- a/cpp/src/parquet/arrow/schema.cc ++++ b/cpp/src/parquet/arrow/schema.cc +@@ -178,7 +178,7 @@ static Status GetTimestampMetadata(const ::arrow::TimestampType& type, + + // The user is explicitly asking for Impala int96 encoding, there is no + // logical type. +- if (arrow_properties.support_deprecated_int96_timestamps()) { ++ if (arrow_properties.support_deprecated_int96_timestamps() && target_unit == ::arrow::TimeUnit::NANO) { + *physical_type = ParquetType::INT96; + return Status::OK(); + } + +diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc +index 285e2a5973..aa6f92f077 100644 +--- a/cpp/src/parquet/arrow/reader.cc ++++ b/cpp/src/parquet/arrow/reader.cc +@@ -1013,25 +1013,32 @@ Status FileReaderImpl::GetRecordBatchReader(const std::vector<int>& row_groups, + return Status::OK(); + } + +- int64_t num_rows = 0; ++ std::vector<int64_t> num_rows; + for (int row_group : row_groups) { +- num_rows += parquet_reader()->metadata()->RowGroup(row_group)->num_rows(); ++ num_rows.push_back(parquet_reader()->metadata()->RowGroup(row_group)->num_rows()); + } + + using ::arrow::RecordBatchIterator; ++ int row_group_idx = 0; + + // NB: This lambda will be invoked outside the scope of this call to + // `GetRecordBatchReader()`, so it must capture `readers` and `batch_schema` by value. + // `this` is a non-owning pointer so we are relying on the parent FileReader outliving + // this RecordBatchReader. + ::arrow::Iterator<RecordBatchIterator> batches = ::arrow::MakeFunctionIterator( +- [readers, batch_schema, num_rows, ++ [readers, batch_schema, num_rows, row_group_idx, + this]() mutable -> ::arrow::Result<RecordBatchIterator> { + ::arrow::ChunkedArrayVector columns(readers.size()); + +- // don't reserve more rows than necessary +- int64_t batch_size = std::min(properties().batch_size(), num_rows); +- num_rows -= batch_size; ++ int64_t batch_size = 0; ++ if (!num_rows.empty()) { ++ // don't reserve more rows than necessary ++ batch_size = std::min(properties().batch_size(), num_rows[row_group_idx]); ++ num_rows[row_group_idx] -= batch_size; ++ if (num_rows[row_group_idx] == 0 && (num_rows.size() - 1) != row_group_idx) { ++ row_group_idx++; ++ } ++ } + + RETURN_NOT_OK(::arrow::internal::OptionalParallelFor( + reader_properties_.use_threads(), static_cast<int>(readers.size()), +diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc +index 4fd7ef1b47..87326a54f1 100644 +--- a/cpp/src/parquet/arrow/writer.cc ++++ b/cpp/src/parquet/arrow/writer.cc +@@ -314,6 +314,14 @@ class FileWriterImpl : public FileWriter { + return Status::OK(); + } + ++ int64_t GetBufferedSize() override { ++ if (row_group_writer_ == nullptr) { ++ return 0; ++ } ++ return row_group_writer_->total_compressed_bytes() + ++ row_group_writer_->total_compressed_bytes_written(); ++ } ++ + Status Close() override { + if (!closed_) { + // Make idempotent +@@ -418,10 +426,13 @@ class FileWriterImpl : public FileWriter { + + // Max number of rows allowed in a row group. + const int64_t max_row_group_length = this->properties().max_row_group_length(); ++ const int64_t max_row_group_size = this->properties().max_row_group_size(); + + // Initialize a new buffered row group writer if necessary. + if (row_group_writer_ == nullptr || !row_group_writer_->buffered() || +- row_group_writer_->num_rows() >= max_row_group_length) { ++ row_group_writer_->num_rows() >= max_row_group_length || ++ (row_group_writer_->total_compressed_bytes_written() + ++ row_group_writer_->total_compressed_bytes() >= max_row_group_size)) { + RETURN_NOT_OK(NewBufferedRowGroup()); + } + +diff --git a/cpp/src/parquet/arrow/writer.h b/cpp/src/parquet/arrow/writer.h +index 4a1a033a7b..0f13d05e44 100644 +--- a/cpp/src/parquet/arrow/writer.h ++++ b/cpp/src/parquet/arrow/writer.h +@@ -138,6 +138,9 @@ class PARQUET_EXPORT FileWriter { + /// option in this case. + virtual ::arrow::Status WriteRecordBatch(const ::arrow::RecordBatch& batch) = 0; + ++ /// \brief Return the buffered size in bytes. ++ virtual int64_t GetBufferedSize() = 0; ++ + /// \brief Write the footer and close the file. + virtual ::arrow::Status Close() = 0; + virtual ~FileWriter(); +diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h +index 4d3acb491e..3906ff3c59 100644 +--- a/cpp/src/parquet/properties.h ++++ b/cpp/src/parquet/properties.h +@@ -139,6 +139,7 @@ static constexpr bool DEFAULT_IS_DICTIONARY_ENABLED = true; + static constexpr int64_t DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT = kDefaultDataPageSize; + static constexpr int64_t DEFAULT_WRITE_BATCH_SIZE = 1024; + static constexpr int64_t DEFAULT_MAX_ROW_GROUP_LENGTH = 1024 * 1024; ++static constexpr int64_t DEFAULT_MAX_ROW_GROUP_SIZE = 128 * 1024 * 1024; + static constexpr bool DEFAULT_ARE_STATISTICS_ENABLED = true; + static constexpr int64_t DEFAULT_MAX_STATISTICS_SIZE = 4096; + static constexpr Encoding::type DEFAULT_ENCODING = Encoding::UNKNOWN; +@@ -232,6 +233,7 @@ class PARQUET_EXPORT WriterProperties { + dictionary_pagesize_limit_(DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT), + write_batch_size_(DEFAULT_WRITE_BATCH_SIZE), + max_row_group_length_(DEFAULT_MAX_ROW_GROUP_LENGTH), ++ max_row_group_size_(DEFAULT_MAX_ROW_GROUP_SIZE), + pagesize_(kDefaultDataPageSize), + version_(ParquetVersion::PARQUET_2_6), + data_page_version_(ParquetDataPageVersion::V1), +@@ -244,6 +246,7 @@ class PARQUET_EXPORT WriterProperties { + dictionary_pagesize_limit_(properties.dictionary_pagesize_limit()), + write_batch_size_(properties.write_batch_size()), + max_row_group_length_(properties.max_row_group_length()), ++ max_row_group_size_(properties.max_row_group_size()), + pagesize_(properties.data_pagesize()), + version_(properties.version()), + data_page_version_(properties.data_page_version()), +@@ -321,6 +324,13 @@ class PARQUET_EXPORT WriterProperties { + return this; + } + ++ /// Specify the max bytes size to put in a single row group. ++ /// Default 128 M. ++ Builder* max_row_group_size(int64_t max_row_group_size) { ++ max_row_group_size_ = max_row_group_size; ++ return this; ++ } ++ + /// Specify the data page size. + /// Default 1MB. + Builder* data_pagesize(int64_t pg_size) { +@@ -664,7 +674,7 @@ class PARQUET_EXPORT WriterProperties { + + return std::shared_ptr<WriterProperties>(new WriterProperties( + pool_, dictionary_pagesize_limit_, write_batch_size_, max_row_group_length_, +- pagesize_, version_, created_by_, page_checksum_enabled_, ++ max_row_group_size_, pagesize_, version_, created_by_, page_checksum_enabled_, + std::move(file_encryption_properties_), default_column_properties_, + column_properties, data_page_version_, store_decimal_as_integer_, + std::move(sorting_columns_))); +@@ -675,6 +685,7 @@ class PARQUET_EXPORT WriterProperties { + int64_t dictionary_pagesize_limit_; + int64_t write_batch_size_; + int64_t max_row_group_length_; ++ int64_t max_row_group_size_; + int64_t pagesize_; + ParquetVersion::type version_; + ParquetDataPageVersion data_page_version_; +@@ -705,6 +716,8 @@ class PARQUET_EXPORT WriterProperties { + + inline int64_t max_row_group_length() const { return max_row_group_length_; } + ++ inline int64_t max_row_group_size() const { return max_row_group_size_; } ++ + inline int64_t data_pagesize() const { return pagesize_; } + + inline ParquetDataPageVersion data_page_version() const { +@@ -810,7 +823,7 @@ class PARQUET_EXPORT WriterProperties { + private: + explicit WriterProperties( + MemoryPool* pool, int64_t dictionary_pagesize_limit, int64_t write_batch_size, +- int64_t max_row_group_length, int64_t pagesize, ParquetVersion::type version, ++ int64_t max_row_group_length, int64_t max_row_group_size, int64_t pagesize, ParquetVersion::type version, + const std::string& created_by, bool page_write_checksum_enabled, + std::shared_ptr<FileEncryptionProperties> file_encryption_properties, + const ColumnProperties& default_column_properties, +@@ -821,6 +834,7 @@ class PARQUET_EXPORT WriterProperties { + dictionary_pagesize_limit_(dictionary_pagesize_limit), + write_batch_size_(write_batch_size), + max_row_group_length_(max_row_group_length), ++ max_row_group_size_(max_row_group_size), + pagesize_(pagesize), + parquet_data_page_version_(data_page_version), + parquet_version_(version), +@@ -836,6 +850,7 @@ class PARQUET_EXPORT WriterProperties { + int64_t dictionary_pagesize_limit_; + int64_t write_batch_size_; + int64_t max_row_group_length_; ++ int64_t max_row_group_size_; + int64_t pagesize_; + ParquetDataPageVersion parquet_data_page_version_; + ParquetVersion::type parquet_version_; +diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake +--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake ++++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake +@@ -981,6 +981,11 @@ if(CMAKE_TOOLCHAIN_FILE) + list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}) + endif() + ++# Compatibility with bundled dependencies that require old CMake versions. ++if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.30") ++ list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_POLICY_VERSION_MINIMUM=3.5) ++endif() ++ + # and crosscompiling emulator (for try_run() ) + if(CMAKE_CROSSCOMPILING_EMULATOR) + string(REPLACE ";" ${EP_LIST_SEPARATOR} EP_CMAKE_CROSSCOMPILING_EMULATOR diff --git a/cmake_modules/jieba.diff b/cmake_modules/jieba.diff new file mode 100644 index 0000000..d74c3f6 --- /dev/null +++ b/cmake_modules/jieba.diff @@ -0,0 +1,16 @@ +diff --git a/include/cppjieba/KeywordExtractor.hpp b/include/cppjieba/KeywordExtractor.hpp +index 24b2c40..c7c6a94 100644 +--- a/include/cppjieba/KeywordExtractor.hpp ++++ b/include/cppjieba/KeywordExtractor.hpp +@@ -89,6 +89,11 @@ class KeywordExtractor { + std::partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare); + keywords.resize(topN); + } ++ ++ const std::unordered_set<std::string>& GetStopWords() const { ++ return stopWords_; ++ } ++ + private: + void LoadIdfDict(const std::string& idfPath) { + std::ifstream ifs(idfPath.c_str()); diff --git a/cmake_modules/orc.diff b/cmake_modules/orc.diff new file mode 100644 index 0000000..e4ca4e2 --- /dev/null +++ b/cmake_modules/orc.diff @@ -0,0 +1,437 @@ +diff --git a/c++/include/orc/MemoryPool.hh b/c++/include/orc/MemoryPool.hh +index a914e5f26..efe1d4933 100644 +--- a/c++/include/orc/MemoryPool.hh ++++ b/c++/include/orc/MemoryPool.hh +@@ -42,13 +42,15 @@ namespace orc { + uint64_t currentSize_; + // maximal capacity (actual allocated memory) + uint64_t currentCapacity_; ++ // flag to indicate whether it needs to manage buffer or not ++ bool ownBuffer_; + + // not implemented + DataBuffer(DataBuffer& buffer); + DataBuffer& operator=(DataBuffer& buffer); + + public: +- DataBuffer(MemoryPool& pool, uint64_t size = 0); ++ DataBuffer(MemoryPool& pool, uint64_t size = 0, bool ownBuf = true); + + DataBuffer(DataBuffer<T>&& buffer) noexcept; + +@@ -81,6 +83,10 @@ namespace orc { + void reserve(uint64_t size); + void resize(uint64_t size); + void zeroOut(); ++ ++ // set external buffer ++ void setData(T* buf, size_t bufSize); ++ + }; + + // Specializations for char +diff --git a/c++/include/orc/Reader.hh b/c++/include/orc/Reader.hh +index b015b6491..585e50ec5 100644 +--- a/c++/include/orc/Reader.hh ++++ b/c++/include/orc/Reader.hh +@@ -659,6 +659,9 @@ namespace orc { + virtual void preBuffer(const std::vector<uint32_t>& stripes, + const std::list<uint64_t>& includeTypes) = 0; + ++ virtual std::vector<std::pair<uint64_t, uint64_t>> preBufferRange( ++ const std::vector<uint32_t>& stripes, const std::list<uint64_t>& includeTypes) = 0; ++ + /** + * Release cached entries whose right boundary is less than or equal to the given boundary. + * @param boundary the boundary value to release cache entries +diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc +index af434c37c..08393259c 100644 +--- a/c++/src/ColumnReader.cc ++++ b/c++/src/ColumnReader.cc +@@ -332,7 +332,13 @@ namespace orc { + nanoBuffer[i] *= 10; + } + } ++ ++ // ORC-306: compensate -1s for JDK bug in java.sql.Timestamp + int64_t writerTime = secsBuffer[i] + epochOffset_; ++ if (writerTime < 0 && nanoBuffer[i] > 999999) { ++ writerTime -= 1; ++ } ++ + if (!sameTimezone_) { + // adjust timestamp value to same wall clock time if writer and reader + // time zones have different rules, which is required for Apache Orc. +@@ -347,9 +353,6 @@ namespace orc { + } + } + secsBuffer[i] = writerTime; +- if (secsBuffer[i] < 0 && nanoBuffer[i] > 999999) { +- secsBuffer[i] -= 1; +- } + } + } + } +diff --git a/c++/src/MemoryPool.cc b/c++/src/MemoryPool.cc +index ed7fee737..a8ee8a67c 100644 +--- a/c++/src/MemoryPool.cc ++++ b/c++/src/MemoryPool.cc +@@ -52,8 +52,8 @@ namespace orc { + } + + template <class T> +- DataBuffer<T>::DataBuffer(MemoryPool& pool, uint64_t newSize) +- : memoryPool_(pool), buf_(nullptr), currentSize_(0), currentCapacity_(0) { ++ DataBuffer<T>::DataBuffer(MemoryPool& pool, uint64_t newSize, bool ownBuf) ++ : memoryPool_(pool), buf_(nullptr), currentSize_(0), currentCapacity_(0), ownBuffer_(ownBuf) { + reserve(newSize); + currentSize_ = newSize; + } +@@ -63,24 +63,35 @@ namespace orc { + : memoryPool_(buffer.memoryPool_), + buf_(buffer.buf_), + currentSize_(buffer.currentSize_), +- currentCapacity_(buffer.currentCapacity_) { +- buffer.buf_ = nullptr; +- buffer.currentSize_ = 0; +- buffer.currentCapacity_ = 0; ++ currentCapacity_(buffer.currentCapacity_), ++ ownBuffer_(buffer.ownBuffer_) { ++ if (buffer.ownBuffer_) { ++ buffer.buf_ = nullptr; ++ buffer.currentSize_ = 0; ++ buffer.currentCapacity_ = 0; ++ } + } + + template <class T> + DataBuffer<T>::~DataBuffer() { ++ if (!ownBuffer_) { ++ return; ++ } + for (uint64_t i = currentSize_; i > 0; --i) { + (buf_ + i - 1)->~T(); + } + if (buf_) { ++ static_assert(std::is_trivially_copyable<T>::value, ++ "Only trivially copyable type is supported for DataBuffer Reserve"); + memoryPool_.free(reinterpret_cast<char*>(buf_)); + } + } + + template <class T> + void DataBuffer<T>::resize(uint64_t newSize) { ++ if (!ownBuffer_) { ++ return; ++ } + reserve(newSize); + if (currentSize_ > newSize) { + for (uint64_t i = currentSize_; i > newSize; --i) { +@@ -96,6 +107,9 @@ namespace orc { + + template <class T> + void DataBuffer<T>::reserve(uint64_t newCapacity) { ++ if (!ownBuffer_) { ++ return; ++ } + if (newCapacity > currentCapacity_ || !buf_) { + if (buf_) { + T* buf_old = buf_; +@@ -114,6 +128,18 @@ namespace orc { + memset(buf_, 0, sizeof(T) * currentCapacity_); + } + ++ template <class T> ++ void DataBuffer<T>::setData(T* buffer, size_t bufSize) { ++ if (ownBuffer_ && buf_) { ++ static_assert(std::is_trivially_copyable<T>::value, ++ "Only trivially copyable type is supported for DataBuffer Reserve"); ++ memoryPool_.free(reinterpret_cast<char*>(buf_)); ++ } ++ ownBuffer_ = false; ++ buf_ = buffer; ++ currentSize_ = currentCapacity_ = bufSize / sizeof(T); ++ } ++ + // Specializations for Int128 + template <> + void DataBuffer<Int128>::zeroOut() { +@@ -126,13 +152,16 @@ namespace orc { + + template <> + DataBuffer<char>::~DataBuffer() { +- if (buf_) { ++ if (ownBuffer_ && buf_) { + memoryPool_.free(reinterpret_cast<char*>(buf_)); + } + } + + template <> + void DataBuffer<char>::resize(uint64_t newSize) { ++ if (!ownBuffer_) { ++ return; ++ } + reserve(newSize); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, newSize - currentSize_); +@@ -144,13 +173,16 @@ namespace orc { + + template <> + DataBuffer<char*>::~DataBuffer() { +- if (buf_) { ++ if (ownBuffer_ && buf_) { + memoryPool_.free(reinterpret_cast<char*>(buf_)); + } + } + + template <> + void DataBuffer<char*>::resize(uint64_t newSize) { ++ if (!ownBuffer_) { ++ return; ++ } + reserve(newSize); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(char*)); +@@ -162,13 +194,16 @@ namespace orc { + + template <> + DataBuffer<double>::~DataBuffer() { +- if (buf_) { ++ if (ownBuffer_ && buf_) { + memoryPool_.free(reinterpret_cast<char*>(buf_)); + } + } + + template <> + void DataBuffer<double>::resize(uint64_t newSize) { ++ if (!ownBuffer_) { ++ return; ++ } + reserve(newSize); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(double)); +@@ -180,13 +215,16 @@ namespace orc { + + template <> + DataBuffer<float>::~DataBuffer() { +- if (buf_) { ++ if (ownBuffer_ && buf_) { + memoryPool_.free(reinterpret_cast<char*>(buf_)); + } + } + + template <> + void DataBuffer<float>::resize(uint64_t newSize) { ++ if (!ownBuffer_) { ++ return; ++ } + reserve(newSize); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(float)); +@@ -198,13 +236,17 @@ namespace orc { + + template <> + DataBuffer<int64_t>::~DataBuffer() { +- if (buf_) { ++ if (ownBuffer_ && buf_) { + memoryPool_.free(reinterpret_cast<char*>(buf_)); + } + } + + template <> + void DataBuffer<int64_t>::resize(uint64_t newSize) { ++ if (!ownBuffer_) { ++ return; ++ } ++ + reserve(newSize); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(int64_t)); +@@ -216,13 +258,17 @@ namespace orc { + + template <> + DataBuffer<int32_t>::~DataBuffer() { +- if (buf_) { ++ if (ownBuffer_ && buf_) { + memoryPool_.free(reinterpret_cast<char*>(buf_)); + } + } + + template <> + void DataBuffer<int32_t>::resize(uint64_t newSize) { ++ if (!ownBuffer_) { ++ return; ++ } ++ + reserve(newSize); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(int32_t)); +@@ -234,13 +280,17 @@ namespace orc { + + template <> + DataBuffer<int16_t>::~DataBuffer() { +- if (buf_) { ++ if (ownBuffer_ && buf_) { + memoryPool_.free(reinterpret_cast<char*>(buf_)); + } + } + + template <> + void DataBuffer<int16_t>::resize(uint64_t newSize) { ++ if (!ownBuffer_) { ++ return; ++ } ++ + reserve(newSize); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(int16_t)); +@@ -252,13 +302,17 @@ namespace orc { + + template <> + DataBuffer<int8_t>::~DataBuffer() { +- if (buf_) { ++ if (ownBuffer_ && buf_) { + memoryPool_.free(reinterpret_cast<char*>(buf_)); + } + } + + template <> + void DataBuffer<int8_t>::resize(uint64_t newSize) { ++ if (!ownBuffer_) { ++ return; ++ } ++ + reserve(newSize); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(int8_t)); +@@ -270,13 +324,17 @@ namespace orc { + + template <> + DataBuffer<uint64_t>::~DataBuffer() { +- if (buf_) { ++ if (ownBuffer_ && buf_) { + memoryPool_.free(reinterpret_cast<char*>(buf_)); + } + } + + template <> + void DataBuffer<uint64_t>::resize(uint64_t newSize) { ++ if (!ownBuffer_) { ++ return; ++ } ++ + reserve(newSize); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(uint64_t)); +@@ -288,13 +346,17 @@ namespace orc { + + template <> + DataBuffer<unsigned char>::~DataBuffer() { +- if (buf_) { ++ if (ownBuffer_ && buf_) { + memoryPool_.free(reinterpret_cast<char*>(buf_)); + } + } + + template <> + void DataBuffer<unsigned char>::resize(uint64_t newSize) { ++ if (!ownBuffer_) { ++ return; ++ } ++ + reserve(newSize); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, newSize - currentSize_); +diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc +index c93c62f6c..2a821b622 100644 +--- a/c++/src/Reader.cc ++++ b/c++/src/Reader.cc +@@ -1531,8 +1531,8 @@ namespace orc { + } + } + +- void ReaderImpl::preBuffer(const std::vector<uint32_t>& stripes, +- const std::list<uint64_t>& includeTypes) { ++ std::vector<std::pair<uint64_t, uint64_t>> ReaderImpl::preBufferRange( ++ const std::vector<uint32_t>& stripes, const std::list<uint64_t>& includeTypes) { + std::vector<uint32_t> newStripes; + for (auto stripe : stripes) { + if (stripe < static_cast<uint32_t>(footer_->stripes_size())) newStripes.push_back(stripe); +@@ -1544,7 +1544,7 @@ namespace orc { + } + + if (newStripes.empty() || newIncludeTypes.empty()) { +- return; ++ return {}; + } + + orc::RowReaderOptions rowReaderOptions; +@@ -1553,7 +1553,7 @@ namespace orc { + std::vector<bool> selectedColumns; + columnSelector.updateSelected(selectedColumns, rowReaderOptions); + +- std::vector<ReadRange> ranges; ++ std::vector<std::pair<uint64_t, uint64_t>> ranges; + ranges.reserve(newIncludeTypes.size()); + for (auto stripe : newStripes) { + // get stripe information +@@ -1598,17 +1598,23 @@ namespace orc { + + offset += stream.length(); + } ++ } ++ return ranges; ++ } + +- { +- std::lock_guard<std::mutex> lock(contents_->readCacheMutex); +- +- if (!contents_->readCache) { +- contents_->readCache = std::make_shared<ReadRangeCache>( +- getStream(), options_.getCacheOptions(), contents_->pool, contents_->readerMetrics); +- } +- contents_->readCache->cache(std::move(ranges)); +- } ++ void ReaderImpl::preBuffer(const std::vector<uint32_t>& stripes, ++ const std::list<uint64_t>& includeTypes) { ++ auto ranges = preBufferRange(stripes, includeTypes); ++ std::vector<ReadRange> read_ranges; ++ for (const auto& range : ranges) { ++ read_ranges.emplace_back(range.first, range.second); ++ } ++ std::lock_guard<std::mutex> lock(contents_->readCacheMutex); ++ if (!contents_->readCache) { ++ contents_->readCache = std::make_shared<ReadRangeCache>( ++ getStream(), options_.getCacheOptions(), contents_->pool, contents_->readerMetrics); + } ++ contents_->readCache->cache(std::move(read_ranges)); + } + + RowReader::~RowReader() { +diff --git a/c++/src/Reader.hh b/c++/src/Reader.hh +index 39ca73967..13da45a49 100644 +--- a/c++/src/Reader.hh ++++ b/c++/src/Reader.hh +@@ -387,6 +387,9 @@ namespace orc { + std::map<uint32_t, BloomFilterIndex> getBloomFilters( + uint32_t stripeIndex, const std::set<uint32_t>& included) const override; + ++ std::vector<std::pair<uint64_t, uint64_t>> preBufferRange( ++ const std::vector<uint32_t>& stripes, const std::list<uint64_t>& includeTypes) override; ++ + void preBuffer(const std::vector<uint32_t>& stripes, + const std::list<uint64_t>& includeTypes) override; + void releaseBuffer(uint64_t boundary) override; +diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake +index 9b2c829c7..434841224 100644 +--- a/cmake_modules/ThirdpartyToolchain.cmake ++++ b/cmake_modules/ThirdpartyToolchain.cmake +@@ -19,6 +19,8 @@ set(ORC_VENDOR_DEPENDENCIES) + set(ORC_SYSTEM_DEPENDENCIES) + set(ORC_INSTALL_INTERFACE_TARGETS) + ++set(BUILD_POSITION_INDEPENDENT_LIB ON) ++ + set(ORC_FORMAT_VERSION "1.0.0") + set(LZ4_VERSION "1.10.0") + set(SNAPPY_VERSION "1.2.1")
