This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 7fc8222e6f GH-45225: [C++] Upgrade ORC to 2.1.0 (#45226)
7fc8222e6f is described below
commit 7fc8222e6f8f55392a451cc328a6d953bf084419
Author: Gang Wu <[email protected]>
AuthorDate: Tue Jan 14 10:25:08 2025 +0800
GH-45225: [C++] Upgrade ORC to 2.1.0 (#45226)
### Rationale for this change
Apache ORC has just released 2.1.0:
https://orc.apache.org/news/2025/01/09/ORC-2.1.0/
We need to upgrade it to avoid occasional download failures of orc-format.
### What changes are included in this PR?
Bump Apache ORC to its latest version 2.1.0.
### Are these changes tested?
Pass CIs.
### Are there any user-facing changes?
No.
* GitHub Issue: #45225
Lead-authored-by: Gang Wu <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Gang Wu <[email protected]>
---
ci/docker/debian-12-cpp.dockerfile | 1 +
ci/docker/ubuntu-20.04-cpp.dockerfile | 1 +
ci/docker/ubuntu-22.04-cpp.dockerfile | 1 +
ci/docker/ubuntu-24.04-cpp.dockerfile | 1 +
cpp/cmake_modules/ThirdpartyToolchain.cmake | 29 +--
cpp/cmake_modules/orc.diff | 289 ++++++++++++++++++++++++++++
cpp/src/arrow/adapters/orc/adapter_test.cc | 8 +-
cpp/thirdparty/versions.txt | 4 +-
python/pyarrow/tests/test_orc.py | 8 +-
9 files changed, 319 insertions(+), 23 deletions(-)
diff --git a/ci/docker/debian-12-cpp.dockerfile
b/ci/docker/debian-12-cpp.dockerfile
index f486d07ff8..fe3976248c 100644
--- a/ci/docker/debian-12-cpp.dockerfile
+++ b/ci/docker/debian-12-cpp.dockerfile
@@ -84,6 +84,7 @@ RUN apt-get update -y -q && \
ninja-build \
nlohmann-json3-dev \
npm \
+ patch \
pkg-config \
protobuf-compiler-grpc \
python3-dev \
diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile
b/ci/docker/ubuntu-20.04-cpp.dockerfile
index 8dc778d544..259c5fb77f 100644
--- a/ci/docker/ubuntu-20.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp.dockerfile
@@ -106,6 +106,7 @@ RUN apt-get update -y -q && \
ninja-build \
nlohmann-json3-dev \
npm \
+ patch \
pkg-config \
protobuf-compiler \
python3-dev \
diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile
b/ci/docker/ubuntu-22.04-cpp.dockerfile
index 28cef29463..721b37dcae 100644
--- a/ci/docker/ubuntu-22.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp.dockerfile
@@ -111,6 +111,7 @@ RUN apt-get update -y -q && \
ninja-build \
nlohmann-json3-dev \
npm \
+ patch \
pkg-config \
protobuf-compiler \
protobuf-compiler-grpc \
diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile
b/ci/docker/ubuntu-24.04-cpp.dockerfile
index 3f486b09f9..592a9a6a23 100644
--- a/ci/docker/ubuntu-24.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-24.04-cpp.dockerfile
@@ -111,6 +111,7 @@ RUN apt-get update -y -q && \
ninja-build \
nlohmann-json3-dev \
npm \
+ patch \
pkg-config \
protobuf-compiler \
protobuf-compiler-grpc \
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index abfe6d274f..f9459f4175 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -4573,11 +4573,16 @@ target_include_directories(arrow::hadoop INTERFACE
"${HADOOP_HOME}/include")
function(build_orc)
message(STATUS "Building Apache ORC from source")
+ # Remove this and "patch" in "ci/docker/{debian,ubuntu}-*.dockerfile" once
we have a patch for ORC 2.1.1
+ find_program(PATCH patch REQUIRED)
+ set(ORC_PATCH_COMMAND ${PATCH} -p1 -i ${CMAKE_CURRENT_LIST_DIR}/orc.diff)
+
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.29)
fetchcontent_declare(orc
${FC_DECLARE_COMMON_OPTIONS}
URL ${ORC_SOURCE_URL}
- URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}")
+ URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}"
+ PATCH_COMMAND ${ORC_PATCH_COMMAND})
prepare_fetchcontent()
set(CMAKE_UNITY_BUILD FALSE)
@@ -4667,16 +4672,10 @@ function(build_orc)
OFF
CACHE BOOL "" FORCE)
- # We can remove this with ORC 2.0.2 or later.
- list(PREPEND CMAKE_MODULE_PATH
- ${CMAKE_CURRENT_BINARY_DIR}/_deps/orc-src/cmake_modules)
-
fetchcontent_makeavailable(orc)
add_library(orc::orc INTERFACE IMPORTED)
target_link_libraries(orc::orc INTERFACE orc)
- target_include_directories(orc::orc INTERFACE
"${orc_BINARY_DIR}/c++/include"
-
"${orc_SOURCE_DIR}/c++/include")
list(APPEND ARROW_BUNDLED_STATIC_LIBS orc)
else()
@@ -4701,6 +4700,9 @@ function(build_orc)
get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD}
INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY)
+ get_target_property(ORC_ZLIB_ROOT ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES)
+ get_filename_component(ORC_ZLIB_ROOT "${ORC_ZLIB_ROOT}" DIRECTORY)
+
set(ORC_CMAKE_ARGS
${EP_COMMON_CMAKE_ARGS}
"-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}"
@@ -4710,7 +4712,6 @@ function(build_orc)
-DBUILD_TOOLS=OFF
-DBUILD_CPP_TESTS=OFF
-DINSTALL_VENDORED_LIBS=OFF
- "-DLZ4_HOME=${ORC_LZ4_ROOT}"
"-DPROTOBUF_EXECUTABLE=$<TARGET_FILE:${ARROW_PROTOBUF_PROTOC}>"
"-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}"
"-DPROTOBUF_INCLUDE_DIR=$<TARGET_PROPERTY:${ARROW_PROTOBUF_LIBPROTOBUF},INTERFACE_INCLUDE_DIRECTORIES>"
@@ -4718,16 +4719,17 @@ function(build_orc)
"-DPROTOC_LIBRARY=$<TARGET_FILE:${ARROW_PROTOBUF_LIBPROTOC}>"
"-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}"
"-DSNAPPY_LIBRARY=$<TARGET_FILE:${Snappy_TARGET}>"
+ "-DLZ4_HOME=${ORC_LZ4_ROOT}"
"-DLZ4_LIBRARY=$<TARGET_FILE:LZ4::lz4>"
"-DLZ4_STATIC_LIB=$<TARGET_FILE:LZ4::lz4>"
"-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include"
"-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}"
"-DZSTD_HOME=${ORC_ZSTD_ROOT}"
"-DZSTD_INCLUDE_DIR=$<TARGET_PROPERTY:${ARROW_ZSTD_LIBZSTD},INTERFACE_INCLUDE_DIRECTORIES>"
- "-DZSTD_LIBRARY=$<TARGET_FILE:${ARROW_ZSTD_LIBZSTD}>")
- if(ZLIB_ROOT)
- set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DZLIB_HOME=${ZLIB_ROOT}")
- endif()
+ "-DZSTD_LIBRARY=$<TARGET_FILE:${ARROW_ZSTD_LIBZSTD}>"
+ "-DZLIB_HOME=${ORC_ZLIB_ROOT}"
+
"-DZLIB_INCLUDE_DIR=$<TARGET_PROPERTY:ZLIB::ZLIB,INTERFACE_INCLUDE_DIRECTORIES>"
+ "-DZLIB_LIBRARY=$<TARGET_FILE:ZLIB::ZLIB>")
# Work around CMake bug
file(MAKE_DIRECTORY ${ORC_INCLUDE_DIR})
@@ -4743,7 +4745,8 @@ function(build_orc)
${ARROW_ZSTD_LIBZSTD}
${Snappy_TARGET}
LZ4::lz4
- ZLIB::ZLIB)
+ ZLIB::ZLIB
+ PATCH_COMMAND ${ORC_PATCH_COMMAND})
add_library(orc::orc STATIC IMPORTED)
set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION
"${ORC_STATIC_LIB}")
target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}")
diff --git a/cpp/cmake_modules/orc.diff b/cpp/cmake_modules/orc.diff
new file mode 100644
index 0000000000..7bdbfa1cf5
--- /dev/null
+++ b/cpp/cmake_modules/orc.diff
@@ -0,0 +1,289 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 1f8931508..f8e57bf5f 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -30,8 +30,8 @@ SET(CPACK_PACKAGE_VERSION_MAJOR "2")
+ SET(CPACK_PACKAGE_VERSION_MINOR "1")
+ SET(CPACK_PACKAGE_VERSION_PATCH "0")
+ SET(ORC_VERSION
"${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
+-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH}
"${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules")
+ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # For clang-tidy.
++list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules")
+
+ option (BUILD_JAVA
+ "Include ORC Java library in the build process"
+@@ -225,5 +225,3 @@ if (BUILD_CPP_TESTS)
+ )
+ endif ()
+ endif ()
+-
+-INCLUDE(CheckFormat)
+diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt
+index 694667c06..af13a94aa 100644
+--- a/c++/src/CMakeLists.txt
++++ b/c++/src/CMakeLists.txt
+@@ -218,8 +218,8 @@ target_include_directories (orc
+ INTERFACE
+ $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+ PUBLIC
+- $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/c++/include>
+- $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/c++/include>
++ $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/c++/include>
++ $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/c++/include>
+ PRIVATE
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ ${CMAKE_CURRENT_BINARY_DIR}
+diff --git a/cmake_modules/ThirdpartyToolchain.cmake
b/cmake_modules/ThirdpartyToolchain.cmake
+index 017e6c5b8..fe376ed16 100644
+--- a/cmake_modules/ThirdpartyToolchain.cmake
++++ b/cmake_modules/ThirdpartyToolchain.cmake
+@@ -103,13 +103,13 @@ endif ()
+
+ # ----------------------------------------------------------------------
+ # Macros for adding third-party libraries
+-macro (add_resolved_library target_name link_lib include_dir)
+- add_library (${target_name} INTERFACE IMPORTED)
++macro (orc_add_resolved_library target_name link_lib include_dir)
++ add_library (${target_name} INTERFACE IMPORTED GLOBAL)
+ target_link_libraries (${target_name} INTERFACE ${link_lib})
+ target_include_directories (${target_name} SYSTEM INTERFACE ${include_dir})
+ endmacro ()
+
+-macro (add_built_library external_project_name target_name link_lib
include_dir)
++macro (orc_add_built_library external_project_name target_name link_lib
include_dir)
+ file (MAKE_DIRECTORY "${include_dir}")
+
+ add_library (${target_name} STATIC IMPORTED)
+@@ -122,7 +122,7 @@ macro (add_built_library external_project_name target_name
link_lib include_dir)
+ endif ()
+ endmacro ()
+
+-function(provide_cmake_module MODULE_NAME)
++function(orc_provide_cmake_module MODULE_NAME)
+ set(module "${CMAKE_SOURCE_DIR}/cmake_modules/${MODULE_NAME}.cmake")
+ if(EXISTS "${module}")
+ message(STATUS "Providing CMake module for ${MODULE_NAME} as part of
CMake package")
+@@ -130,8 +130,8 @@ function(provide_cmake_module MODULE_NAME)
+ endif()
+ endfunction()
+
+-function(provide_find_module PACKAGE_NAME)
+- provide_cmake_module("Find${PACKAGE_NAME}")
++function(orc_provide_find_module PACKAGE_NAME)
++ orc_provide_cmake_module("Find${PACKAGE_NAME}")
+ endfunction()
+
+ # ----------------------------------------------------------------------
+@@ -156,7 +156,7 @@ ExternalProject_Add (orc-format_ep
+ # Snappy
+ if (ORC_PACKAGE_KIND STREQUAL "conan")
+ find_package (Snappy REQUIRED CONFIG)
+- add_resolved_library (orc_snappy ${Snappy_LIBRARIES} ${Snappy_INCLUDE_DIR})
++ orc_add_resolved_library (orc_snappy ${Snappy_LIBRARIES}
${Snappy_INCLUDE_DIR})
+ list (APPEND ORC_SYSTEM_DEPENDENCIES Snappy)
+ list (APPEND ORC_INSTALL_INTERFACE_TARGETS
"$<INSTALL_INTERFACE:Snappy::snappy>")
+ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+@@ -168,13 +168,13 @@ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+ elseif (NOT "${SNAPPY_HOME}" STREQUAL "")
+ find_package (Snappy REQUIRED)
+ if (ORC_PREFER_STATIC_SNAPPY AND SNAPPY_STATIC_LIB)
+- add_resolved_library (orc_snappy ${SNAPPY_STATIC_LIB}
${SNAPPY_INCLUDE_DIR})
++ orc_add_resolved_library (orc_snappy ${SNAPPY_STATIC_LIB}
${SNAPPY_INCLUDE_DIR})
+ else ()
+- add_resolved_library (orc_snappy ${SNAPPY_LIBRARY} ${SNAPPY_INCLUDE_DIR})
++ orc_add_resolved_library (orc_snappy ${SNAPPY_LIBRARY}
${SNAPPY_INCLUDE_DIR})
+ endif ()
+ list (APPEND ORC_SYSTEM_DEPENDENCIES Snappy)
+ list (APPEND ORC_INSTALL_INTERFACE_TARGETS
"$<INSTALL_INTERFACE:Snappy::snappy>")
+- provide_find_module (Snappy)
++ orc_provide_find_module (Snappy)
+ else ()
+ set(SNAPPY_HOME "${THIRDPARTY_DIR}/snappy_ep-install")
+ set(SNAPPY_INCLUDE_DIR "${SNAPPY_HOME}/include")
+@@ -194,7 +194,7 @@ else ()
+ ${THIRDPARTY_LOG_OPTIONS}
+ BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}")
+
+- add_built_library (snappy_ep orc_snappy ${SNAPPY_STATIC_LIB}
${SNAPPY_INCLUDE_DIR})
++ orc_add_built_library (snappy_ep orc_snappy ${SNAPPY_STATIC_LIB}
${SNAPPY_INCLUDE_DIR})
+
+ list (APPEND ORC_VENDOR_DEPENDENCIES
"orc::vendored_snappy|${SNAPPY_STATIC_LIB_NAME}")
+ list (APPEND ORC_INSTALL_INTERFACE_TARGETS
"$<INSTALL_INTERFACE:orc::vendored_snappy>")
+@@ -207,7 +207,7 @@ add_library (orc::snappy ALIAS orc_snappy)
+
+ if (ORC_PACKAGE_KIND STREQUAL "conan")
+ find_package (ZLIB REQUIRED CONFIG)
+- add_resolved_library (orc_zlib ${ZLIB_LIBRARIES} ${ZLIB_INCLUDE_DIR})
++ orc_add_resolved_library (orc_zlib ${ZLIB_LIBRARIES} ${ZLIB_INCLUDE_DIR})
+ list (APPEND ORC_SYSTEM_DEPENDENCIES ZLIB)
+ list (APPEND ORC_INSTALL_INTERFACE_TARGETS
"$<INSTALL_INTERFACE:ZLIB::ZLIB>")
+ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+@@ -219,13 +219,13 @@ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+ elseif (NOT "${ZLIB_HOME}" STREQUAL "")
+ find_package (ZLIB REQUIRED)
+ if (ORC_PREFER_STATIC_ZLIB AND ZLIB_STATIC_LIB)
+- add_resolved_library (orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR})
++ orc_add_resolved_library (orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR})
+ else ()
+- add_resolved_library (orc_zlib ${ZLIB_LIBRARY} ${ZLIB_INCLUDE_DIR})
++ orc_add_resolved_library (orc_zlib ${ZLIB_LIBRARY} ${ZLIB_INCLUDE_DIR})
+ endif ()
+ list (APPEND ORC_SYSTEM_DEPENDENCIES ZLIB)
+ list (APPEND ORC_INSTALL_INTERFACE_TARGETS
"$<INSTALL_INTERFACE:ZLIB::ZLIB>")
+- provide_find_module (ZLIB)
++ orc_provide_find_module (ZLIB)
+ else ()
+ set(ZLIB_PREFIX "${THIRDPARTY_DIR}/zlib_ep-install")
+ set(ZLIB_INCLUDE_DIR "${ZLIB_PREFIX}/include")
+@@ -252,7 +252,7 @@ else ()
+ ${THIRDPARTY_LOG_OPTIONS}
+ BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}")
+
+- add_built_library (zlib_ep orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR})
++ orc_add_built_library (zlib_ep orc_zlib ${ZLIB_STATIC_LIB}
${ZLIB_INCLUDE_DIR})
+
+ list (APPEND ORC_VENDOR_DEPENDENCIES
"orc::vendored_zlib|${ZLIB_STATIC_LIB_NAME}")
+ list (APPEND ORC_INSTALL_INTERFACE_TARGETS
"$<INSTALL_INTERFACE:orc::vendored_zlib>")
+@@ -265,7 +265,7 @@ add_library (orc::zlib ALIAS orc_zlib)
+
+ if (ORC_PACKAGE_KIND STREQUAL "conan")
+ find_package (ZSTD REQUIRED CONFIG)
+- add_resolved_library (orc_zstd ${zstd_LIBRARIES} ${zstd_INCLUDE_DIR})
++ orc_add_resolved_library (orc_zstd ${zstd_LIBRARIES} ${zstd_INCLUDE_DIR})
+ list (APPEND ORC_SYSTEM_DEPENDENCIES ZSTD)
+ list (APPEND ORC_INSTALL_INTERFACE_TARGETS
"$<INSTALL_INTERFACE:$<IF:$<TARGET_EXISTS:zstd::libzstd_shared>,zstd::libzstd_shared,zstd::libzstd_static>>")
+ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+@@ -277,14 +277,14 @@ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+ elseif (NOT "${ZSTD_HOME}" STREQUAL "")
+ find_package (ZSTD REQUIRED)
+ if (ORC_PREFER_STATIC_ZSTD AND ZSTD_STATIC_LIB)
+- add_resolved_library (orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR})
++ orc_add_resolved_library (orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR})
+ list (APPEND ORC_INSTALL_INTERFACE_TARGETS
"$<INSTALL_INTERFACE:zstd::libzstd_static>")
+ else ()
+- add_resolved_library (orc_zstd ${ZSTD_LIBRARY} ${ZSTD_INCLUDE_DIR})
++ orc_add_resolved_library (orc_zstd ${ZSTD_LIBRARY} ${ZSTD_INCLUDE_DIR})
+ list (APPEND ORC_INSTALL_INTERFACE_TARGETS
"$<INSTALL_INTERFACE:$<IF:$<TARGET_EXISTS:zstd::libzstd_shared>,zstd::libzstd_shared,zstd::libzstd_static>>")
+ endif ()
+ list (APPEND ORC_SYSTEM_DEPENDENCIES ZSTD)
+- provide_find_module (ZSTD)
++ orc_provide_find_module (ZSTD)
+ else ()
+ set(ZSTD_HOME "${THIRDPARTY_DIR}/zstd_ep-install")
+ set(ZSTD_INCLUDE_DIR "${ZSTD_HOME}/include")
+@@ -318,7 +318,7 @@ else ()
+ ${THIRDPARTY_LOG_OPTIONS}
+ BUILD_BYPRODUCTS ${ZSTD_STATIC_LIB})
+
+- add_built_library (zstd_ep orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR})
++ orc_add_built_library (zstd_ep orc_zstd ${ZSTD_STATIC_LIB}
${ZSTD_INCLUDE_DIR})
+
+ list (APPEND ORC_VENDOR_DEPENDENCIES
"orc::vendored_zstd|${ZSTD_STATIC_LIB_NAME}")
+ list (APPEND ORC_INSTALL_INTERFACE_TARGETS
"$<INSTALL_INTERFACE:orc::vendored_zstd>")
+@@ -330,7 +330,7 @@ add_library (orc::zstd ALIAS orc_zstd)
+ # LZ4
+ if (ORC_PACKAGE_KIND STREQUAL "conan")
+ find_package (LZ4 REQUIRED CONFIG)
+- add_resolved_library (orc_lz4 ${lz4_LIBRARIES} ${lz4_INCLUDE_DIR})
++ orc_add_resolved_library (orc_lz4 ${lz4_LIBRARIES} ${lz4_INCLUDE_DIR})
+ list (APPEND ORC_SYSTEM_DEPENDENCIES LZ4)
+ list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:LZ4::lz4>")
+ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+@@ -342,13 +342,13 @@ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+ elseif (NOT "${LZ4_HOME}" STREQUAL "")
+ find_package (LZ4 REQUIRED)
+ if (ORC_PREFER_STATIC_LZ4 AND LZ4_STATIC_LIB)
+- add_resolved_library (orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR})
++ orc_add_resolved_library (orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR})
+ else ()
+- add_resolved_library (orc_lz4 ${LZ4_LIBRARY} ${LZ4_INCLUDE_DIR})
++ orc_add_resolved_library (orc_lz4 ${LZ4_LIBRARY} ${LZ4_INCLUDE_DIR})
+ endif ()
+ list (APPEND ORC_SYSTEM_DEPENDENCIES LZ4)
+ list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:LZ4::lz4>")
+- provide_find_module (LZ4)
++ orc_provide_find_module (LZ4)
+ else ()
+ set(LZ4_PREFIX "${THIRDPARTY_DIR}/lz4_ep-install")
+ set(LZ4_INCLUDE_DIR "${LZ4_PREFIX}/include")
+@@ -375,7 +375,7 @@ else ()
+ ${THIRDPARTY_LOG_OPTIONS}
+ BUILD_BYPRODUCTS ${LZ4_STATIC_LIB})
+
+- add_built_library (lz4_ep orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR})
++ orc_add_built_library (lz4_ep orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR})
+
+ list (APPEND ORC_VENDOR_DEPENDENCIES
"orc::vendored_lz4|${LZ4_STATIC_LIB_NAME}")
+ list (APPEND ORC_INSTALL_INTERFACE_TARGETS
"$<INSTALL_INTERFACE:orc::vendored_lz4>")
+@@ -491,7 +491,7 @@ endif ()
+
+ if (ORC_PACKAGE_KIND STREQUAL "conan")
+ find_package (Protobuf REQUIRED CONFIG)
+- add_resolved_library (orc_protobuf ${protobuf_LIBRARIES}
${protobuf_INCLUDE_DIR})
++ orc_add_resolved_library (orc_protobuf ${protobuf_LIBRARIES}
${protobuf_INCLUDE_DIR})
+ list (APPEND ORC_SYSTEM_DEPENDENCIES Protobuf)
+ list (APPEND ORC_INSTALL_INTERFACE_TARGETS
"$<INSTALL_INTERFACE:protobuf::libprotobuf>")
+ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+@@ -505,20 +505,20 @@ elseif (NOT "${PROTOBUF_HOME}" STREQUAL "")
+ find_package (Protobuf REQUIRED)
+
+ if (ORC_PREFER_STATIC_PROTOBUF AND PROTOBUF_STATIC_LIB)
+- add_resolved_library (orc_protobuf ${PROTOBUF_STATIC_LIB}
${PROTOBUF_INCLUDE_DIR})
++ orc_add_resolved_library (orc_protobuf ${PROTOBUF_STATIC_LIB}
${PROTOBUF_INCLUDE_DIR})
+ else ()
+- add_resolved_library (orc_protobuf ${PROTOBUF_LIBRARY}
${PROTOBUF_INCLUDE_DIR})
++ orc_add_resolved_library (orc_protobuf ${PROTOBUF_LIBRARY}
${PROTOBUF_INCLUDE_DIR})
+ endif ()
+
+ if (ORC_PREFER_STATIC_PROTOBUF AND PROTOC_STATIC_LIB)
+- add_resolved_library (orc_protoc ${PROTOC_STATIC_LIB}
${PROTOBUF_INCLUDE_DIR})
++ orc_add_resolved_library (orc_protoc ${PROTOC_STATIC_LIB}
${PROTOBUF_INCLUDE_DIR})
+ else ()
+- add_resolved_library (orc_protoc ${PROTOC_LIBRARY}
${PROTOBUF_INCLUDE_DIR})
++ orc_add_resolved_library (orc_protoc ${PROTOC_LIBRARY}
${PROTOBUF_INCLUDE_DIR})
+ endif ()
+
+ list (APPEND ORC_SYSTEM_DEPENDENCIES Protobuf)
+ list (APPEND ORC_INSTALL_INTERFACE_TARGETS
"$<INSTALL_INTERFACE:protobuf::libprotobuf>")
+- provide_find_module (Protobuf)
++ orc_provide_find_module (Protobuf)
+ else ()
+ set(PROTOBUF_PREFIX "${THIRDPARTY_DIR}/protobuf_ep-install")
+ set(PROTOBUF_INCLUDE_DIR "${PROTOBUF_PREFIX}/include")
+@@ -556,8 +556,8 @@ else ()
+ ${THIRDPARTY_LOG_OPTIONS}
+ BUILD_BYPRODUCTS "${PROTOBUF_STATIC_LIB}" "${PROTOC_STATIC_LIB}")
+
+- add_built_library (protobuf_ep orc_protobuf ${PROTOBUF_STATIC_LIB}
${PROTOBUF_INCLUDE_DIR})
+- add_built_library (protobuf_ep orc_protoc ${PROTOC_STATIC_LIB}
${PROTOBUF_INCLUDE_DIR})
++ orc_add_built_library (protobuf_ep orc_protobuf ${PROTOBUF_STATIC_LIB}
${PROTOBUF_INCLUDE_DIR})
++ orc_add_built_library (protobuf_ep orc_protoc ${PROTOC_STATIC_LIB}
${PROTOBUF_INCLUDE_DIR})
+
+ list (APPEND ORC_VENDOR_DEPENDENCIES
"orc::vendored_protobuf|${PROTOBUF_STATIC_LIB_NAME}")
+ list (APPEND ORC_INSTALL_INTERFACE_TARGETS
"$<INSTALL_INTERFACE:orc::vendored_protobuf>")
+@@ -610,7 +610,7 @@ if(BUILD_LIBHDFSPP)
+ BUILD_BYPRODUCTS "${LIBHDFSPP_STATIC_LIB}"
+ CMAKE_ARGS ${LIBHDFSPP_CMAKE_ARGS})
+
+- add_built_library(libhdfspp_ep libhdfspp ${LIBHDFSPP_STATIC_LIB}
${LIBHDFSPP_INCLUDE_DIR})
++ orc_add_built_library(libhdfspp_ep libhdfspp ${LIBHDFSPP_STATIC_LIB}
${LIBHDFSPP_INCLUDE_DIR})
+
+ set (LIBHDFSPP_LIBRARIES
+ libhdfspp
diff --git a/cpp/src/arrow/adapters/orc/adapter_test.cc
b/cpp/src/arrow/adapters/orc/adapter_test.cc
index b9d6c53215..b3c314fccc 100644
--- a/cpp/src/arrow/adapters/orc/adapter_test.cc
+++ b/cpp/src/arrow/adapters/orc/adapter_test.cc
@@ -235,7 +235,7 @@ void AssertTableWriteReadEqual(const
std::vector<std::shared_ptr<Table>>& input_
write_options.compression = Compression::UNCOMPRESSED;
#endif
write_options.file_version = adapters::orc::FileVersion(0, 11);
- write_options.compression_block_size = 32768;
+ write_options.compression_block_size = 64 * 1024;
write_options.row_index_stride = 5000;
EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open(
buffer_output_stream.get(),
write_options));
@@ -272,7 +272,7 @@ void AssertBatchWriteReadEqual(
write_options.compression = Compression::UNCOMPRESSED;
#endif
write_options.file_version = adapters::orc::FileVersion(0, 11);
- write_options.compression_block_size = 32768;
+ write_options.compression_block_size = 64 * 1024;
write_options.row_index_stride = 5000;
EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open(
buffer_output_stream.get(),
write_options));
@@ -330,7 +330,7 @@ std::unique_ptr<liborc::Writer> CreateWriter(uint64_t
stripe_size,
liborc::OutputStream* stream) {
liborc::WriterOptions options;
options.setStripeSize(stripe_size);
- options.setCompressionBlockSize(1024);
+ options.setCompressionBlockSize(64 * 1024);
options.setMemoryPool(liborc::getDefaultPool());
options.setRowIndexStride(0);
return liborc::createWriter(type, stream, options);
@@ -668,7 +668,7 @@ TEST_F(TestORCWriterTrivialNoWrite, noWrite) {
write_options.compression = Compression::UNCOMPRESSED;
#endif
write_options.file_version = adapters::orc::FileVersion(0, 11);
- write_options.compression_block_size = 32768;
+ write_options.compression_block_size = 64 * 1024;
write_options.row_index_stride = 5000;
EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open(
buffer_output_stream.get(),
write_options));
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 53d2034600..29f0cc7d1b 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -90,8 +90,8 @@ ARROW_OPENTELEMETRY_BUILD_VERSION=v1.13.0
ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM=7735cc56507149686e6019e06f588317099d4522480be5f38a2a09ec69af1706
ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v0.17.0
ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=f269fbcb30e17b03caa1decd231ce826e59d7651c0f71c3b28eb5140b4bb5412
-ARROW_ORC_BUILD_VERSION=2.0.3
-ARROW_ORC_BUILD_SHA256_CHECKSUM=082cba862b5a8a0d14c225404d0b51cd8d1b64ca81b8f1e500322ce8922cb86d
+ARROW_ORC_BUILD_VERSION=2.1.0
+ARROW_ORC_BUILD_SHA256_CHECKSUM=69d45665bfb5699b709094ba630ae4b186b19e083c4438855fc29c77125c149c
ARROW_PROTOBUF_BUILD_VERSION=v21.3
ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f
# Because of https://github.com/Tencent/rapidjson/pull/1323, we require
diff --git a/python/pyarrow/tests/test_orc.py b/python/pyarrow/tests/test_orc.py
index b0f9e813b1..706fb3fe45 100644
--- a/python/pyarrow/tests/test_orc.py
+++ b/python/pyarrow/tests/test_orc.py
@@ -334,7 +334,7 @@ def test_buffer_readwrite_with_writeoptions():
compression='snappy',
file_version='0.11',
row_index_stride=5000,
- compression_block_size=32768,
+ compression_block_size=65536,
)
buffer_reader = pa.BufferReader(buffer_output_stream.getvalue())
orc_file = orc.ORCFile(buffer_reader)
@@ -344,7 +344,7 @@ def test_buffer_readwrite_with_writeoptions():
assert orc_file.compression == 'SNAPPY'
assert orc_file.file_version == '0.11'
assert orc_file.row_index_stride == 5000
- assert orc_file.compression_size == 32768
+ assert orc_file.compression_size == 65536
# deprecated keyword order
buffer_output_stream = pa.BufferOutputStream()
@@ -355,7 +355,7 @@ def test_buffer_readwrite_with_writeoptions():
compression='uncompressed',
file_version='0.11',
row_index_stride=20000,
- compression_block_size=16384,
+ compression_block_size=65536,
)
buffer_reader = pa.BufferReader(buffer_output_stream.getvalue())
orc_file = orc.ORCFile(buffer_reader)
@@ -365,7 +365,7 @@ def test_buffer_readwrite_with_writeoptions():
assert orc_file.compression == 'UNCOMPRESSED'
assert orc_file.file_version == '0.11'
assert orc_file.row_index_stride == 20000
- assert orc_file.compression_size == 16384
+ assert orc_file.compression_size == 65536
def test_buffer_readwrite_with_bad_writeoptions():