This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 30809c6f48 GH-45885: [C++] Require C++20 (#48414)
30809c6f48 is described below
commit 30809c6f48a1a74b40dafd6bb27ac4b9aeded93e
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Dec 18 17:17:13 2025 +0100
GH-45885: [C++] Require C++20 (#48414)
### Rationale for this change
We decided to migrate Arrow C++ to C++20 in this discussion:
https://lists.apache.org/thread/48zlj0dn2y0f53y2k37qsr90y781wfnj
### What changes are included in this PR?
1. Build configuration updates (CMake files etc.) to build with C++20
instead of C++17
2. C++-level fixes to ensure compilation succeeds:
* Workarounds for deprecation of atomic access on `std::shared_ptr<T>`
(the replacement `std::atomic<std::shared_ptr<T>>` is unfortunately not
supported in all standard library implementations)
* Workaround for [Abseil ABI
issues](https://github.com/abseil/abseil-cpp/issues/1624)
* Trivial update to `arrow/util/string.h` to call a C++20 API (to
validate that C++20 is actually enabled)
3. CI configuration updates to get enough C++20 support on the various
platforms:
* Bump default clang and LLVM versions to 18
* Use AlmaLinux 10 for release verification (AlmaLinux 8 is still
tested on other CI builds)
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes, Arrow C++ will now require a C++20-compliant compiler.
* GitHub Issue: #45885
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
.env | 2 +-
.github/workflows/cpp.yml | 12 ++--
.github/workflows/cpp_extra.yml | 1 -
.github/workflows/cpp_windows.yml | 2 +-
.github/workflows/r.yml | 3 +-
.github/workflows/ruby.yml | 2 +-
c_glib/meson.build | 2 +-
.../docker/almalinux-10-verify-rc.dockerfile | 42 ++----------
ci/docker/debian-experimental-cpp.dockerfile | 2 +
ci/scripts/cpp_build.sh | 4 +-
ci/scripts/install_sccache.sh | 6 +-
ci/scripts/r_windows_build.sh | 2 +-
cpp/CMakeLists.txt | 6 +-
cpp/cmake_modules/GandivaAddBitcode.cmake | 2 +-
cpp/cmake_modules/SetupCxxFlags.cmake | 8 +--
cpp/examples/minimal_build/CMakeLists.txt | 4 +-
cpp/examples/minimal_build/run_static.sh | 2 +-
cpp/examples/parquet/parquet_arrow/CMakeLists.txt | 4 +-
cpp/examples/tutorial_examples/CMakeLists.txt | 2 +-
cpp/meson.build | 2 +-
cpp/src/arrow/CMakeLists.txt | 8 +--
cpp/src/arrow/array/array_nested.cc | 80 ++++++++++++++++------
cpp/src/arrow/array/array_nested.h | 18 ++++-
.../compute/kernels/hash_aggregate_numeric.cc | 44 ++++++++----
cpp/src/arrow/filesystem/gcsfs.cc | 3 +-
cpp/src/arrow/filesystem/gcsfs_internal.cc | 5 +-
cpp/src/arrow/filesystem/gcsfs_internal.h | 9 +--
cpp/src/arrow/filesystem/s3fs.cc | 24 +++----
cpp/src/arrow/flight/server_tracing_middleware.cc | 1 -
.../arrow/flight/transport/grpc/customize_grpc.h | 20 ++++++
cpp/src/arrow/flight/transport/grpc/grpc_client.cc | 2 +
cpp/src/arrow/flight/transport/grpc/grpc_server.cc | 2 +
cpp/src/arrow/record_batch.cc | 23 +++----
cpp/src/arrow/util/range.h | 5 +-
cpp/src/arrow/util/string.h | 6 +-
cpp/src/arrow/util/vector.h | 9 ++-
cpp/src/gandiva/engine.cc | 2 +-
cpp/src/gandiva/precompiled/decimal_ops.cc | 3 +-
cpp/thirdparty/versions.txt | 4 +-
dev/release/setup-rhel-rebuilds.sh | 15 +---
dev/release/verify-apt.sh | 2 +-
dev/release/verify-yum.sh | 2 +-
dev/tasks/homebrew-formulae/apache-arrow.rb | 2 +-
.../apache-arrow/yum/almalinux-10/Dockerfile | 1 -
.../apache-arrow/yum/almalinux-8/Dockerfile | 6 +-
.../apache-arrow/yum/almalinux-9/Dockerfile | 1 -
.../linux-packages/apache-arrow/yum/arrow.spec.in | 21 ++++++
dev/tasks/linux-packages/yum/build.sh | 11 +++
dev/tasks/r/github.packages.yml | 2 +
dev/tasks/tasks.yml | 17 +----
dev/tasks/verify-rc/github.macos.yml | 15 +---
docs/source/cpp/conventions.rst | 2 +-
docs/source/developers/cpp/building.rst | 4 +-
docs/source/python/integration/extending.rst | 2 +-
matlab/CMakeLists.txt | 2 +-
matlab/tools/cmake/BuildMatlabArrowInterface.cmake | 2 +-
python/CMakeLists.txt | 2 +-
python/pyarrow/tests/test_cython.py | 4 +-
58 files changed, 277 insertions(+), 214 deletions(-)
diff --git a/.env b/.env
index 6a62cad5fc..dad867f8f6 100644
--- a/.env
+++ b/.env
@@ -57,7 +57,7 @@ FEDORA=42
UBUNTU=22.04
# Default versions for various dependencies
-CLANG_TOOLS=14
+CLANG_TOOLS=18
CMAKE=3.26.0
CUDA=11.7.1
DASK=latest
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 6586480a81..45a9c3ba77 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -81,24 +81,24 @@ jobs:
matrix:
include:
- arch: amd64
- clang-tools: 14
+ clang-tools: 18
image: conda-cpp
- llvm: 14
+ llvm: 18
runs-on: ubuntu-latest
simd-level: AVX2
title: AMD64 Conda C++ AVX2
ubuntu: 22.04
- arch: amd64
- clang-tools: 14
+ clang-tools: 18
image: ubuntu-cpp-sanitizer
- llvm: 14
+ llvm: 18
runs-on: ubuntu-latest
title: AMD64 Ubuntu 24.04 C++ ASAN UBSAN
ubuntu: 24.04
- arch: arm64v8
- clang-tools: 14
+ clang-tools: 18
image: ubuntu-cpp
- llvm: 14
+ llvm: 18
runs-on: ubuntu-24.04-arm
title: ARM64 Ubuntu 22.04 C++
ubuntu: 22.04
diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml
index b56b74c9a2..ca5a3adb4b 100644
--- a/.github/workflows/cpp_extra.yml
+++ b/.github/workflows/cpp_extra.yml
@@ -347,7 +347,6 @@ jobs:
ARROW_DEPENDENCY_SOURCE: VCPKG
ARROW_FLIGHT_SQL_ODBC: ON
ARROW_SIMD_LEVEL: AVX2
- CMAKE_CXX_STANDARD: "17"
CMAKE_GENERATOR: Ninja
CMAKE_INSTALL_PREFIX: /usr
VCPKG_BINARY_SOURCES: 'clear;nuget,GitHub,readwrite'
diff --git a/.github/workflows/cpp_windows.yml
b/.github/workflows/cpp_windows.yml
index ae74e4d695..394cd8851c 100644
--- a/.github/workflows/cpp_windows.yml
+++ b/.github/workflows/cpp_windows.yml
@@ -63,7 +63,7 @@ jobs:
ARROW_WITH_ZLIB: ON
ARROW_WITH_ZSTD: ON
BOOST_SOURCE: BUNDLED
- CMAKE_CXX_STANDARD: "17"
+ CMAKE_CXX_STANDARD: "20"
CMAKE_GENERATOR: Ninja
CMAKE_INSTALL_PREFIX: /usr
CMAKE_UNITY_BUILD: ON
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 25875c7d83..38bccb1f4d 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -229,12 +229,13 @@ jobs:
# static library. The R is not used here but R 4.1 was the last R to
use
# Rtools40.
r-version: "4.1"
- rtools-version: 40
+ rtools-version: ${{ matrix.config.rtools }}
Ncpus: 2
- name: Build Arrow C++
shell: bash
env:
MINGW_ARCH: ${{ matrix.config.arch }}
+ RTOOLS_VERSION: ${{ matrix.config.rtools }}
run: ci/scripts/r_windows_build.sh
- name: Rename libarrow.zip
# So that they're unique when multiple are downloaded in the next step
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index 0a9140bfa2..3545f1c16a 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -349,7 +349,7 @@ jobs:
ARROW_WITH_SNAPPY: ON
ARROW_WITH_ZLIB: ON
ARROW_WITH_ZSTD: ON
- CMAKE_CXX_STANDARD: "17"
+ CMAKE_CXX_STANDARD: "20"
CMAKE_GENERATOR: Ninja
CMAKE_INSTALL_PREFIX: "${{ github.workspace }}/dist"
VCPKG_BINARY_SOURCES: 'clear;nuget,GitHub,readwrite'
diff --git a/c_glib/meson.build b/c_glib/meson.build
index 46fc56e7db..e51024f6fa 100644
--- a/c_glib/meson.build
+++ b/c_glib/meson.build
@@ -21,7 +21,7 @@ project(
'arrow-glib',
'c',
'cpp',
- default_options: ['c_std=c99', 'cpp_std=c++17'],
+ default_options: ['c_std=c99', 'cpp_std=c++20'],
license: 'Apache-2.0',
# Debian:
# https://packages.debian.org/search?keywords=meson
diff --git a/dev/release/setup-rhel-rebuilds.sh
b/ci/docker/almalinux-10-verify-rc.dockerfile
old mode 100755
new mode 100644
similarity index 51%
copy from dev/release/setup-rhel-rebuilds.sh
copy to ci/docker/almalinux-10-verify-rc.dockerfile
index d6b335433a..efd77a86d1
--- a/dev/release/setup-rhel-rebuilds.sh
+++ b/ci/docker/almalinux-10-verify-rc.dockerfile
@@ -1,4 +1,3 @@
-#!/bin/sh
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -16,39 +15,10 @@
# specific language governing permissions and limitations
# under the License.
+ARG arch=amd64
+FROM ${arch}/almalinux:10
-# A script to install dependencies required for release
-# verification Red Hat Enterprise Linux 8 clones in particular
-# on AlmaLinux 8 and Rocky Linux 8
-
-set -exu
-
-dnf -y install 'dnf-command(config-manager)'
-dnf config-manager --set-enabled powertools
-dnf -y update
-dnf -y module disable nodejs
-dnf -y module enable nodejs:18
-dnf -y module disable ruby
-dnf -y module enable ruby:2.7
-dnf -y groupinstall "Development Tools"
-dnf -y install \
- cmake \
- git \
- gobject-introspection-devel \
- libcurl-devel \
- llvm-devel \
- llvm-toolset \
- ncurses-devel \
- ninja-build \
- nodejs \
- openssl-devel \
- python3.12-devel \
- ruby-devel \
- sqlite-devel \
- vala-devel \
- wget \
- which
-
-npm install -g yarn
-
-python3 -m ensurepip --upgrade
+COPY dev/release/setup-rhel-rebuilds.sh /
+RUN /setup-rhel-rebuilds.sh && \
+ rm /setup-rhel-rebuilds.sh && \
+ dnf -y clean all
diff --git a/ci/docker/debian-experimental-cpp.dockerfile
b/ci/docker/debian-experimental-cpp.dockerfile
index 743f5ddd3b..d37b58e230 100644
--- a/ci/docker/debian-experimental-cpp.dockerfile
+++ b/ci/docker/debian-experimental-cpp.dockerfile
@@ -59,6 +59,7 @@ RUN if [ -n "${gcc}" ]; then \
libldap-dev \
liblz4-dev \
libnghttp2-dev \
+ libopentelemetry-proto-dev \
libprotobuf-dev \
libprotoc-dev \
libpsl-dev \
@@ -88,6 +89,7 @@ RUN if [ -n "${gcc}" ]; then \
rapidjson-dev \
rsync \
tzdata \
+ tzdata-legacy \
zlib1g-dev && \
apt-get install -y -q --no-install-recommends -t experimental \
clang${llvm_package_suffix} \
diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh
index 2f02f8c149..904b5cccb4 100755
--- a/ci/scripts/cpp_build.sh
+++ b/ci/scripts/cpp_build.sh
@@ -168,7 +168,7 @@ elif [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then
-DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \
-DCMAKE_C_FLAGS="${CFLAGS:-}" \
-DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \
- -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-17}" \
+ -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-20}" \
-DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \
-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \
-DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \
@@ -260,7 +260,7 @@ else
-DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \
-DCMAKE_C_FLAGS="${CFLAGS:-}" \
-DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \
- -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-17}" \
+ -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-20}" \
-DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \
-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \
-DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \
diff --git a/ci/scripts/install_sccache.sh b/ci/scripts/install_sccache.sh
index c571625a3b..3899c482af 100755
--- a/ci/scripts/install_sccache.sh
+++ b/ci/scripts/install_sccache.sh
@@ -19,15 +19,17 @@
set -e
+DEFAULT_VERSION=0.12.0
+
if [ "$#" -lt 2 ] || [ "$#" -gt 3 ]; then
echo "Usage: $0 <build> <prefix> <version>"
- echo "Will default to version=0.3.0 "
+ echo "Will default to version=${DEFAULT_VERSION}"
exit 1
fi
BUILD=$1
PREFIX=$2
-VERSION=${3:-0.3.0}
+VERSION=${3:-${DEFAULT_VERSION}}
ARCH=$(uname -m)
if [ "${ARCH}" != x86_64 ] && [ "${ARCH}" != aarch64 ]; then
diff --git a/ci/scripts/r_windows_build.sh b/ci/scripts/r_windows_build.sh
index e3b68c941c..ef9c58f6af 100755
--- a/ci/scripts/r_windows_build.sh
+++ b/ci/scripts/r_windows_build.sh
@@ -44,7 +44,7 @@ mv mingw* build
cd build
# This may vary by system/CI provider
-MSYS_LIB_DIR="/c/rtools40"
+MSYS_LIB_DIR="/c/rtools${RTOOLS_VERSION}"
# Untar the builds we made
ls *.xz | xargs -n 1 tar -xJf
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index c9f026f926..5b260c0eb6 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -528,10 +528,10 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}
${ARROW_CXXFLAGS}")
# C++ specific flags.
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CXX_COMMON_FLAGS} ${ARROW_CXXFLAGS}")
-# Remove --std=c++17 to avoid errors from C compilers
-string(REPLACE "-std=c++17" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
+# Remove -std=c++20 to avoid errors from C compilers
+string(REPLACE "-std=c++20" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
-# Add C++-only flags, like -std=c++17
+# Add C++-only flags, like -std=c++20
set(CMAKE_CXX_FLAGS "${CXX_ONLY_FLAGS} ${CMAKE_CXX_FLAGS}")
include(ThirdpartyToolchain)
diff --git a/cpp/cmake_modules/GandivaAddBitcode.cmake
b/cpp/cmake_modules/GandivaAddBitcode.cmake
index 6b5e5b3e60..b22581b4a1 100644
--- a/cpp/cmake_modules/GandivaAddBitcode.cmake
+++ b/cpp/cmake_modules/GandivaAddBitcode.cmake
@@ -17,7 +17,7 @@
# Create bitcode for the given source file.
function(gandiva_add_bitcode SOURCE)
- set(CLANG_OPTIONS -std=c++17)
+ set(CLANG_OPTIONS -std=c++20)
if(MSVC)
# "19.20" means that it's compatible with Visual Studio 16 2019.
# We can update this to "19.30" when we dropped support for Visual
diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake
b/cpp/cmake_modules/SetupCxxFlags.cmake
index 3c172aebdf..c92daf3269 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -141,12 +141,12 @@ endif()
# This ensures that things like c++17 get passed correctly
if(NOT DEFINED CMAKE_CXX_STANDARD)
- set(CMAKE_CXX_STANDARD 17)
-elseif(${CMAKE_CXX_STANDARD} VERSION_LESS 17)
- message(FATAL_ERROR "Cannot set a CMAKE_CXX_STANDARD smaller than 17")
+ set(CMAKE_CXX_STANDARD 20)
+elseif(${CMAKE_CXX_STANDARD} VERSION_LESS 20)
+ message(FATAL_ERROR "Cannot set a CMAKE_CXX_STANDARD smaller than 20")
endif()
-# We require a C++17 compliant compiler
+# We require a C++20 compliant compiler
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# ARROW-6848: Do not use GNU (or other CXX) extensions
diff --git a/cpp/examples/minimal_build/CMakeLists.txt
b/cpp/examples/minimal_build/CMakeLists.txt
index 626b987b09..d0a0a1e0a2 100644
--- a/cpp/examples/minimal_build/CMakeLists.txt
+++ b/cpp/examples/minimal_build/CMakeLists.txt
@@ -29,10 +29,10 @@ cmake_dependent_option(ARROW_LINK_SHARED
OFF)
if(NOT DEFINED CMAKE_CXX_STANDARD)
- set(CMAKE_CXX_STANDARD 17)
+ set(CMAKE_CXX_STANDARD 20)
endif()
-# We require a C++17 compliant compiler
+# We require a C++20 compliant compiler
set(CMAKE_CXX_STANDARD_REQUIRED ON)
if(NOT CMAKE_BUILD_TYPE)
diff --git a/cpp/examples/minimal_build/run_static.sh
b/cpp/examples/minimal_build/run_static.sh
index 189f59a007..26019227a7 100755
--- a/cpp/examples/minimal_build/run_static.sh
+++ b/cpp/examples/minimal_build/run_static.sh
@@ -86,7 +86,7 @@ echo
rm -rf $EXAMPLE_BUILD_DIR
mkdir -p $EXAMPLE_BUILD_DIR
-${CXX:-c++} -std=c++17 \
+${CXX:-c++} -std=c++20 \
-o $EXAMPLE_BUILD_DIR/arrow-example \
$EXAMPLE_DIR/example.cc \
$(PKG_CONFIG_PATH=$ARROW_BUILD_DIR/lib/pkgconfig \
diff --git a/cpp/examples/parquet/parquet_arrow/CMakeLists.txt
b/cpp/examples/parquet/parquet_arrow/CMakeLists.txt
index 189d17914d..6c3dac8f2c 100644
--- a/cpp/examples/parquet/parquet_arrow/CMakeLists.txt
+++ b/cpp/examples/parquet/parquet_arrow/CMakeLists.txt
@@ -28,10 +28,10 @@ option(PARQUET_LINK_SHARED "Link to the Parquet shared
library" ON)
# This ensures that things like -std=gnu++... get passed correctly
if(NOT DEFINED CMAKE_CXX_STANDARD)
- set(CMAKE_CXX_STANDARD 17)
+ set(CMAKE_CXX_STANDARD 20)
endif()
-# We require a C++17 compliant compiler
+# We require a C++20 compliant compiler
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# Look for installed packages the system
diff --git a/cpp/examples/tutorial_examples/CMakeLists.txt
b/cpp/examples/tutorial_examples/CMakeLists.txt
index 1466bce48a..d236b7e0a9 100644
--- a/cpp/examples/tutorial_examples/CMakeLists.txt
+++ b/cpp/examples/tutorial_examples/CMakeLists.txt
@@ -21,7 +21,7 @@ project(ArrowTutorialExamples)
find_package(ArrowDataset)
-set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall -Wextra")
set(CMAKE_BUILD_TYPE Release)
diff --git a/cpp/meson.build b/cpp/meson.build
index f3e0181b68..16bb844d08 100644
--- a/cpp/meson.build
+++ b/cpp/meson.build
@@ -22,7 +22,7 @@ project(
version: '23.0.0-SNAPSHOT',
license: 'Apache-2.0',
meson_version: '>=1.3.0',
- default_options: ['c_std=c11', 'warning_level=2', 'cpp_std=c++17'],
+ default_options: ['c_std=c11', 'warning_level=2', 'cpp_std=c++20'],
)
project_args = [
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index deb6db3cdd..a46db60321 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -202,7 +202,7 @@ function(arrow_add_object_library PREFIX)
add_library(${prefix}_shared OBJECT ${SOURCES})
set_target_properties(${prefix}_shared PROPERTIES
POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(${prefix}_shared PRIVATE ARROW_EXPORTING)
- target_compile_features(${prefix}_shared PRIVATE cxx_std_17)
+ target_compile_features(${prefix}_shared PRIVATE cxx_std_20)
set(${PREFIX}_TARGET_SHARED
${prefix}_shared
PARENT_SCOPE)
@@ -212,7 +212,7 @@ function(arrow_add_object_library PREFIX)
add_library(${prefix}_static OBJECT ${SOURCES})
set_target_properties(${prefix}_static PROPERTIES
POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(${prefix}_static PRIVATE ARROW_STATIC)
- target_compile_features(${prefix}_static PRIVATE cxx_std_17)
+ target_compile_features(${prefix}_static PRIVATE cxx_std_20)
set(${PREFIX}_TARGET_STATIC
${prefix}_static
PARENT_SCOPE)
@@ -224,7 +224,7 @@ function(arrow_add_object_library PREFIX)
else()
add_library(${prefix} OBJECT ${SOURCES})
set_target_properties(${prefix} PROPERTIES POSITION_INDEPENDENT_CODE ON)
- target_compile_features(${prefix} PRIVATE cxx_std_17)
+ target_compile_features(${prefix} PRIVATE cxx_std_20)
set(${PREFIX}_TARGET_SHARED
${prefix}
PARENT_SCOPE)
@@ -1181,7 +1181,7 @@ endif()
foreach(LIB_TARGET ${ARROW_LIBRARIES})
target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_EXPORTING)
# C++17 is required to compile against Arrow C++ headers and libraries
- target_compile_features(${LIB_TARGET} PUBLIC cxx_std_17)
+ target_compile_features(${LIB_TARGET} PUBLIC cxx_std_20)
endforeach()
if(ARROW_WITH_BACKTRACE)
diff --git a/cpp/src/arrow/array/array_nested.cc
b/cpp/src/arrow/array/array_nested.cc
index b7f1686079..c5a26a475c 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -1004,10 +1004,16 @@ Result<std::shared_ptr<Array>>
FixedSizeListArray::Flatten(
// ----------------------------------------------------------------------
// Struct
+struct StructArray::Impl {
+ mutable ArrayVector boxed_fields_;
+};
+
+StructArray::~StructArray() = default;
StructArray::StructArray(const std::shared_ptr<ArrayData>& data) {
ARROW_CHECK_EQ(data->type->id(), Type::STRUCT);
SetData(data);
- boxed_fields_.resize(data->child_data.size());
+ impl_ = std::make_unique<Impl>();
+ impl_->boxed_fields_.resize(data_->child_data.size());
}
StructArray::StructArray(const std::shared_ptr<DataType>& type, int64_t length,
@@ -1016,10 +1022,12 @@ StructArray::StructArray(const
std::shared_ptr<DataType>& type, int64_t length,
int64_t offset) {
ARROW_CHECK_EQ(type->id(), Type::STRUCT);
SetData(ArrayData::Make(type, length, {std::move(null_bitmap)}, null_count,
offset));
+ data_->child_data.reserve(children.size());
for (const auto& child : children) {
data_->child_data.push_back(child->data());
}
- boxed_fields_.resize(children.size());
+ impl_ = std::make_unique<Impl>();
+ impl_->boxed_fields_.resize(data_->child_data.size());
}
Result<std::shared_ptr<StructArray>> StructArray::Make(
@@ -1073,11 +1081,16 @@ const ArrayVector& StructArray::fields() const {
for (int i = 0; i < num_fields(); ++i) {
(void)field(i);
}
- return boxed_fields_;
+ return impl_->boxed_fields_;
}
std::shared_ptr<Array> StructArray::field(int i) const {
- std::shared_ptr<Array> result = std::atomic_load(&boxed_fields_[i]);
+ // Atomic ops on std::shared_ptr<T> are deprecated in C++20. They should be
+ // replaced with std::atomic<std::shared_ptr<T>> but not all C++ standard
+ // libraries implement it yet. :-/
+ ARROW_SUPPRESS_DEPRECATION_WARNING
+ std::shared_ptr<Array> result = std::atomic_load(&impl_->boxed_fields_[i]);
+ ARROW_UNSUPPRESS_DEPRECATION_WARNING
if (result) {
return result;
}
@@ -1091,7 +1104,11 @@ std::shared_ptr<Array> StructArray::field(int i) const {
// Check if some other thread inserted the array in the meantime and return
// that in that case.
std::shared_ptr<Array> expected = nullptr;
- if (!std::atomic_compare_exchange_strong(&boxed_fields_[i], &expected,
result)) {
+ ARROW_SUPPRESS_DEPRECATION_WARNING
+ const bool update_successful =
+ std::atomic_compare_exchange_strong(&impl_->boxed_fields_[i], &expected,
result);
+ ARROW_UNSUPPRESS_DEPRECATION_WARNING
+ if (!update_successful) {
result = std::move(expected);
}
return result;
@@ -1183,6 +1200,13 @@ Result<std::shared_ptr<Array>>
StructArray::GetFlattenedField(int index,
// ----------------------------------------------------------------------
// UnionArray
+struct UnionArray::Impl {
+ mutable ArrayVector boxed_fields_;
+};
+
+UnionArray::UnionArray() = default;
+UnionArray::~UnionArray() = default;
+
void UnionArray::SetData(std::shared_ptr<ArrayData> data) {
this->Array::SetData(std::move(data));
@@ -1190,7 +1214,8 @@ void UnionArray::SetData(std::shared_ptr<ArrayData> data)
{
ARROW_CHECK_GE(data_->buffers.size(), 2);
raw_type_codes_ = data->GetValuesSafe<int8_t>(1);
- boxed_fields_.resize(data_->child_data.size());
+ impl_ = std::make_unique<Impl>();
+ impl_->boxed_fields_.resize(data_->child_data.size());
}
void SparseUnionArray::SetData(std::shared_ptr<ArrayData> data) {
@@ -1214,6 +1239,8 @@ void DenseUnionArray::SetData(const
std::shared_ptr<ArrayData>& data) {
raw_value_offsets_ = data->GetValuesSafe<int32_t>(2);
}
+SparseUnionArray::~SparseUnionArray() = default;
+
SparseUnionArray::SparseUnionArray(std::shared_ptr<ArrayData> data) {
SetData(std::move(data));
}
@@ -1267,6 +1294,8 @@ Result<std::shared_ptr<Array>>
SparseUnionArray::GetFlattenedField(
return MakeArray(child_data);
}
+DenseUnionArray::~DenseUnionArray() = default;
+
DenseUnionArray::DenseUnionArray(const std::shared_ptr<ArrayData>& data) {
SetData(data);
}
@@ -1359,23 +1388,34 @@ Result<std::shared_ptr<Array>> SparseUnionArray::Make(
}
std::shared_ptr<Array> UnionArray::field(int i) const {
- if (i < 0 ||
- static_cast<decltype(boxed_fields_)::size_type>(i) >=
boxed_fields_.size()) {
+ if (i < 0 || static_cast<size_t>(i) >= impl_->boxed_fields_.size()) {
return nullptr;
}
- std::shared_ptr<Array> result = std::atomic_load(&boxed_fields_[i]);
- if (!result) {
- std::shared_ptr<ArrayData> child_data = data_->child_data[i]->Copy();
- if (mode() == UnionMode::SPARSE) {
- // Sparse union: need to adjust child if union is sliced
- // (for dense unions, the need to lookup through the offsets
- // makes this unnecessary)
- if (data_->offset != 0 || child_data->length > data_->length) {
- child_data = child_data->Slice(data_->offset, data_->length);
- }
+ ARROW_SUPPRESS_DEPRECATION_WARNING
+ std::shared_ptr<Array> result = std::atomic_load(&impl_->boxed_fields_[i]);
+ ARROW_UNSUPPRESS_DEPRECATION_WARNING
+ if (result) {
+ return result;
+ }
+ std::shared_ptr<ArrayData> child_data = data_->child_data[i]->Copy();
+ if (mode() == UnionMode::SPARSE) {
+ // Sparse union: need to adjust child if union is sliced
+ // (for dense unions, the need to lookup through the offsets
+ // makes this unnecessary)
+ if (data_->offset != 0 || child_data->length > data_->length) {
+ child_data = child_data->Slice(data_->offset, data_->length);
}
- result = MakeArray(child_data);
- std::atomic_store(&boxed_fields_[i], result);
+ }
+ result = MakeArray(child_data);
+ // Check if some other thread inserted the array in the meantime and return
+ // that in that case.
+ std::shared_ptr<Array> expected = nullptr;
+ ARROW_SUPPRESS_DEPRECATION_WARNING
+ const bool update_successful =
+ std::atomic_compare_exchange_strong(&impl_->boxed_fields_[i], &expected,
result);
+ ARROW_UNSUPPRESS_DEPRECATION_WARNING
+ if (!update_successful) {
+ result = std::move(expected);
}
return result;
}
diff --git a/cpp/src/arrow/array/array_nested.h
b/cpp/src/arrow/array/array_nested.h
index 2591fdaf41..bf84f802b1 100644
--- a/cpp/src/arrow/array/array_nested.h
+++ b/cpp/src/arrow/array/array_nested.h
@@ -661,6 +661,8 @@ class ARROW_EXPORT StructArray : public Array {
public:
using TypeClass = StructType;
+ ~StructArray() override;
+
explicit StructArray(const std::shared_ptr<ArrayData>& data);
StructArray(const std::shared_ptr<DataType>& type, int64_t length,
@@ -720,8 +722,8 @@ class ARROW_EXPORT StructArray : public Array {
private:
// For caching boxed child data
- // XXX This is not handled in a thread-safe manner.
- mutable ArrayVector boxed_fields_;
+ struct ARROW_NO_EXPORT Impl;
+ std::unique_ptr<Impl> impl_;
};
// ----------------------------------------------------------------------
@@ -732,6 +734,8 @@ class ARROW_EXPORT UnionArray : public Array {
public:
using type_code_t = int8_t;
+ ~UnionArray() override;
+
/// Note that this buffer does not account for any slice offset
const std::shared_ptr<Buffer>& type_codes() const { return
data_->buffers[1]; }
@@ -754,13 +758,17 @@ class ARROW_EXPORT UnionArray : public Array {
std::shared_ptr<Array> field(int pos) const;
protected:
+ UnionArray();
+
void SetData(std::shared_ptr<ArrayData> data);
const type_code_t* raw_type_codes_;
const UnionType* union_type_;
+ private:
// For caching boxed child data
- mutable std::vector<std::shared_ptr<Array>> boxed_fields_;
+ struct ARROW_NO_EXPORT Impl;
+ std::unique_ptr<Impl> impl_;
};
/// Concrete Array class for sparse union data
@@ -768,6 +776,8 @@ class ARROW_EXPORT SparseUnionArray : public UnionArray {
public:
using TypeClass = SparseUnionType;
+ ~SparseUnionArray() override;
+
explicit SparseUnionArray(std::shared_ptr<ArrayData> data);
SparseUnionArray(std::shared_ptr<DataType> type, int64_t length, ArrayVector
children,
@@ -821,6 +831,8 @@ class ARROW_EXPORT DenseUnionArray : public UnionArray {
public:
using TypeClass = DenseUnionType;
+ ~DenseUnionArray() override;
+
explicit DenseUnionArray(const std::shared_ptr<ArrayData>& data);
DenseUnionArray(std::shared_ptr<DataType> type, int64_t length, ArrayVector
children,
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_numeric.cc
b/cpp/src/arrow/compute/kernels/hash_aggregate_numeric.cc
index acd485f530..aa231430aa 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_numeric.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_numeric.cc
@@ -866,21 +866,41 @@ using GroupedKurtosisImpl =
ConcreteGroupedStatisticImpl<Type, SkewOptions, StatisticType::Kurtosis>;
template <template <typename Type> typename GroupedImpl>
-Result<HashAggregateKernel> MakeGroupedStatisticKernel(
- const std::shared_ptr<DataType>& type) {
- auto make_kernel = [&](auto&& type) -> Result<HashAggregateKernel> {
- using T = std::decay_t<decltype(type)>;
- // Supporting all number types except float16
- if constexpr (is_integer_type<T>::value ||
- (is_floating_type<T>::value &&
!is_half_float_type<T>::value) ||
- is_decimal_type<T>::value) {
- return MakeKernel(InputType(T::type_id),
HashAggregateInit<GroupedImpl<T>>);
- }
+struct GroupedStatisticKernelFactory {
+ // Supporting all number types except float16
+ template <typename T>
+ enable_if_number<T, Status> Visit(const T& type) {
+ out = MakeKernel(InputType(T::type_id), HashAggregateInit<GroupedImpl<T>>);
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_decimal<T, Status> Visit(const T& type) {
+ out = MakeKernel(InputType(T::type_id), HashAggregateInit<GroupedImpl<T>>);
+ return Status::OK();
+ }
+
+ Status Visit(const HalfFloatType& type) {
return Status::NotImplemented("Computing higher-order statistic of data of
type ",
type);
- };
+ }
+
+ Status Visit(const DataType& type) {
+ return Status::NotImplemented("Computing higher-order statistic of data of
type ",
+ type);
+ }
+
+ HashAggregateKernel out;
+};
- return VisitType(*type, make_kernel);
+template <template <typename Type> typename GroupedImpl>
+Result<HashAggregateKernel> MakeGroupedStatisticKernel(
+ const std::shared_ptr<DataType>& type) {
+ // Using a distinct visitor class because a generic lambda would hit a MSVC
+ // internal compiler error.
+ GroupedStatisticKernelFactory<GroupedImpl> visitor;
+ RETURN_NOT_OK(VisitTypeInline(*type, &visitor));
+ return std::move(visitor.out);
}
Status AddHashAggregateStatisticKernels(HashAggregateFunction* func,
diff --git a/cpp/src/arrow/filesystem/gcsfs.cc
b/cpp/src/arrow/filesystem/gcsfs.cc
index 82d8a2a13f..ffeba9eadc 100644
--- a/cpp/src/arrow/filesystem/gcsfs.cc
+++ b/cpp/src/arrow/filesystem/gcsfs.cc
@@ -17,10 +17,11 @@
#include "arrow/filesystem/gcsfs.h"
-#include <google/cloud/storage/client.h>
#include <algorithm>
#include <chrono>
+#include <google/cloud/storage/client.h>
+
#include "arrow/buffer.h"
#include "arrow/filesystem/gcsfs_internal.h"
#include "arrow/filesystem/path_util.h"
diff --git a/cpp/src/arrow/filesystem/gcsfs_internal.cc
b/cpp/src/arrow/filesystem/gcsfs_internal.cc
index 721f4da88a..038fe3097b 100644
--- a/cpp/src/arrow/filesystem/gcsfs_internal.cc
+++ b/cpp/src/arrow/filesystem/gcsfs_internal.cc
@@ -18,14 +18,13 @@
#include "arrow/filesystem/gcsfs_internal.h"
#include "arrow/filesystem/gcsfs.h"
-#include <absl/time/time.h> // NOLINT
-#include <google/cloud/storage/client.h>
-
#include <cerrno>
#include <sstream>
#include <unordered_map>
#include <vector>
+#include <absl/time/time.h> // NOLINT
+
#include "arrow/filesystem/path_util.h"
#include "arrow/util/io_util.h"
#include "arrow/util/key_value_metadata.h"
diff --git a/cpp/src/arrow/filesystem/gcsfs_internal.h
b/cpp/src/arrow/filesystem/gcsfs_internal.h
index e06b568189..c1fd7891c3 100644
--- a/cpp/src/arrow/filesystem/gcsfs_internal.h
+++ b/cpp/src/arrow/filesystem/gcsfs_internal.h
@@ -17,17 +17,18 @@
#pragma once
+#include <memory>
+#include <string>
+#include <vector>
+
#include <google/cloud/credentials.h>
#include <google/cloud/options.h>
#include <google/cloud/status.h>
+#include <google/cloud/storage/client.h>
#include <google/cloud/storage/object_metadata.h>
#include <google/cloud/storage/well_known_headers.h>
#include <google/cloud/storage/well_known_parameters.h>
-#include <memory>
-#include <string>
-#include <vector>
-
#include "arrow/filesystem/filesystem.h"
namespace arrow {
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index 0477417150..c6b821f5de 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -1188,26 +1188,21 @@ class RegionResolver {
}
static Result<std::shared_ptr<RegionResolver>> DefaultInstance() {
- auto resolver = std::atomic_load(&instance_);
- if (resolver) {
- return resolver;
+ std::unique_lock lock(instance_mutex_);
+ if (instance_) {
+ return instance_;
}
auto maybe_resolver = Make(S3Options::Anonymous());
if (!maybe_resolver.ok()) {
return maybe_resolver;
}
- // Make sure to always return the same instance even if several threads
- // call DefaultInstance at once.
- std::shared_ptr<RegionResolver> existing;
- if (std::atomic_compare_exchange_strong(&instance_, &existing,
*maybe_resolver)) {
- return *maybe_resolver;
- } else {
- return existing;
- }
+ instance_ = *maybe_resolver;
+ return maybe_resolver;
}
static void ResetDefaultInstance() {
- std::atomic_store(&instance_, std::shared_ptr<RegionResolver>());
+ std::unique_lock lock(instance_mutex_);
+ instance_.reset();
}
Result<std::string> ResolveRegion(const std::string& bucket) {
@@ -1241,7 +1236,8 @@ class RegionResolver {
return builder_.BuildClient().Value(&holder_);
}
- static std::shared_ptr<RegionResolver> instance_;
+ static inline std::mutex instance_mutex_;
+ static inline std::shared_ptr<RegionResolver> instance_;
ClientBuilder builder_;
std::shared_ptr<S3ClientHolder> holder_;
@@ -1252,8 +1248,6 @@ class RegionResolver {
std::unordered_map<std::string, std::string> cache_;
};
-std::shared_ptr<RegionResolver> RegionResolver::instance_;
-
// -----------------------------------------------------------------------
// S3 file stream implementations
diff --git a/cpp/src/arrow/flight/server_tracing_middleware.cc
b/cpp/src/arrow/flight/server_tracing_middleware.cc
index f300116200..0cdf59308d 100644
--- a/cpp/src/arrow/flight/server_tracing_middleware.cc
+++ b/cpp/src/arrow/flight/server_tracing_middleware.cc
@@ -31,7 +31,6 @@
# include <opentelemetry/context/propagation/text_map_propagator.h>
# include <opentelemetry/trace/context.h>
# include <opentelemetry/trace/propagation/http_trace_context.h>
-# include <opentelemetry/trace/semantic_conventions.h>
#endif
namespace arrow {
diff --git a/cpp/src/arrow/flight/transport/grpc/customize_grpc.h
b/cpp/src/arrow/flight/transport/grpc/customize_grpc.h
index 153aa5ae1d..7836f7c161 100644
--- a/cpp/src/arrow/flight/transport/grpc/customize_grpc.h
+++ b/cpp/src/arrow/flight/transport/grpc/customize_grpc.h
@@ -20,6 +20,26 @@
#include <limits>
#include <memory>
+// HACK: Workaround absl::Mutex ABI incompatibility by making sure the
+// non-debug version of Abseil is included
+// (https://github.com/conda-forge/abseil-cpp-feedstock/issues/104,
+// https://github.com/abseil/abseil-cpp/issues/1624)
+
+#if __has_include(<absl/synchronization/mutex.h>)
+
+# ifndef NDEBUG
+# define ARROW_NO_NDEBUG
+# define NDEBUG
+# endif
+
+# include <absl/synchronization/mutex.h>
+
+# ifdef ARROW_NO_NDEBUG
+# undef NDEBUG
+# endif
+
+#endif
+
#include "arrow/flight/platform.h"
#include "arrow/flight/type_fwd.h"
#include "arrow/flight/visibility.h"
diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
index 0f489fd165..6cf3242b07 100644
--- a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
+++ b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
@@ -28,6 +28,8 @@
#include <unordered_map>
#include <utility>
+#include "arrow/flight/transport/grpc/customize_grpc.h"
+
#include <grpcpp/grpcpp.h>
#include <grpcpp/support/client_callback.h>
#if defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS)
diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_server.cc
b/cpp/src/arrow/flight/transport/grpc/grpc_server.cc
index 28fc736aa0..3cc854b58f 100644
--- a/cpp/src/arrow/flight/transport/grpc/grpc_server.cc
+++ b/cpp/src/arrow/flight/transport/grpc/grpc_server.cc
@@ -25,6 +25,8 @@
#include <unordered_map>
#include <utility>
+#include "arrow/flight/transport/grpc/customize_grpc.h"
+
#include <grpcpp/grpcpp.h>
#include "arrow/buffer.h"
diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index 65c5c56c4a..1162b4c3bb 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -18,10 +18,10 @@
#include "arrow/record_batch.h"
#include <algorithm>
-#include <atomic>
#include <cmath>
#include <cstdlib>
#include <memory>
+#include <mutex>
#include <sstream>
#include <string>
#include <utility>
@@ -96,25 +96,21 @@ class SimpleRecordBatch : public RecordBatch {
}
const std::vector<std::shared_ptr<Array>>& columns() const override {
+ std::lock_guard lock(mutex_);
for (int i = 0; i < num_columns(); ++i) {
- // Force all columns to be boxed
- column(i);
+ if (!boxed_columns_[i]) {
+ boxed_columns_[i] = MakeArray(columns_[i]);
+ }
}
return boxed_columns_;
}
std::shared_ptr<Array> column(int i) const override {
- std::shared_ptr<Array> result = std::atomic_load(&boxed_columns_[i]);
- if (!result) {
- auto new_array = MakeArray(columns_[i]);
- // Be careful not to overwrite existing entry if another thread has been
calling
- // `column(i)` at the same time, since the `boxed_columns_` contents are
exposed
- // by `columns()` (see GH-45371).
- if (std::atomic_compare_exchange_strong(&boxed_columns_[i], &result,
new_array)) {
- return new_array;
- }
+ std::lock_guard lock(mutex_);
+ if (!boxed_columns_[i]) {
+ boxed_columns_[i] = MakeArray(columns_[i]);
}
- return result;
+ return boxed_columns_[i];
}
std::shared_ptr<ArrayData> column_data(int i) const override { return
columns_[i]; }
@@ -211,6 +207,7 @@ class SimpleRecordBatch : public RecordBatch {
std::vector<std::shared_ptr<ArrayData>> columns_;
// Caching boxed array data
+ mutable std::mutex mutex_;
mutable std::vector<std::shared_ptr<Array>> boxed_columns_;
// the type of device that the buffers for columns are allocated on.
diff --git a/cpp/src/arrow/util/range.h b/cpp/src/arrow/util/range.h
index 55155b7e34..449a1fbd80 100644
--- a/cpp/src/arrow/util/range.h
+++ b/cpp/src/arrow/util/range.h
@@ -239,14 +239,15 @@ struct Zip<std::tuple<Ranges...>,
std::index_sequence<I...>> {
/// \endcode
template <typename I = size_t>
constexpr auto Enumerate = [] {
+ using Int = I;
struct {
struct sentinel {};
constexpr sentinel end() const { return {}; }
struct iterator {
- I value{0};
+ Int value{0};
- constexpr I operator*() { return value; }
+ constexpr Int operator*() { return value; }
constexpr iterator& operator++() {
++value;
diff --git a/cpp/src/arrow/util/string.h b/cpp/src/arrow/util/string.h
index f2081d0937..d39b7a295e 100644
--- a/cpp/src/arrow/util/string.h
+++ b/cpp/src/arrow/util/string.h
@@ -54,14 +54,12 @@ namespace internal {
/// Like std::string_view::starts_with in C++20
inline bool StartsWith(std::string_view s, std::string_view prefix) {
- return s.length() >= prefix.length() &&
- (s.empty() || s.substr(0, prefix.length()) == prefix);
+ return s.starts_with(prefix);
}
/// Like std::string_view::ends_with in C++20
inline bool EndsWith(std::string_view s, std::string_view suffix) {
- return s.length() >= suffix.length() &&
- (s.empty() || s.substr(s.length() - suffix.length()) == suffix);
+ return s.ends_with(suffix);
}
/// \brief Split a string with a delimiter
diff --git a/cpp/src/arrow/util/vector.h b/cpp/src/arrow/util/vector.h
index e77d713a44..809497b9ae 100644
--- a/cpp/src/arrow/util/vector.h
+++ b/cpp/src/arrow/util/vector.h
@@ -145,12 +145,11 @@ template <typename T>
Result<std::vector<T>> UnwrapOrRaise(std::vector<Result<T>>&& results) {
std::vector<T> out;
out.reserve(results.size());
- auto end = std::make_move_iterator(results.end());
- for (auto it = std::make_move_iterator(results.begin()); it != end; it++) {
- if (!it->ok()) {
- return it->status();
+ for (auto&& result : results) {
+ if (!result.ok()) {
+ return result.status();
}
- out.push_back(it->MoveValueUnsafe());
+ out.push_back(result.MoveValueUnsafe());
}
return out;
}
diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc
index a55421b1b4..64ed433a68 100644
--- a/cpp/src/gandiva/engine.cc
+++ b/cpp/src/gandiva/engine.cc
@@ -185,7 +185,7 @@ void AddProcessSymbol(llvm::orc::LLJIT& lljit) {
llvm::cantFail(llvm::orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
lljit.getDataLayout().getGlobalPrefix())));
// the `atexit` symbol cannot be found for ASAN
-#ifdef ADDRESS_SANITIZER
+#if defined(ADDRESS_SANITIZER) && LLVM_VERSION_MAJOR < 18
if (!lljit.lookup("atexit")) {
AddAbsoluteSymbol(lljit, "atexit", reinterpret_cast<void*>(atexit));
}
diff --git a/cpp/src/gandiva/precompiled/decimal_ops.cc
b/cpp/src/gandiva/precompiled/decimal_ops.cc
index 2908d2fbc6..68949680b7 100644
--- a/cpp/src/gandiva/precompiled/decimal_ops.cc
+++ b/cpp/src/gandiva/precompiled/decimal_ops.cc
@@ -527,7 +527,8 @@ double ToDouble(const BasicDecimalScalar128& in, bool*
overflow) {
BasicDecimal128 FromInt64(int64_t in, int32_t precision, int32_t scale, bool*
overflow) {
// check if multiplying by scale will cause an overflow.
- DECIMAL_OVERFLOW_IF(std::abs(in) > GetMaxValue(precision - scale), overflow);
+ const auto max_val = GetMaxValue(precision - scale);
+ DECIMAL_OVERFLOW_IF(in > max_val || in < -max_val, overflow);
return in * BasicDecimal128::GetScaleMultiplier(scale);
}
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index f536dab0ac..51edbe2b99 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -114,8 +114,8 @@ ARROW_THRIFT_BUILD_VERSION=0.22.0
ARROW_THRIFT_BUILD_SHA256_CHECKSUM=794a0e455787960d9f27ab92c38e34da27e8deeda7a5db0e59dc64a00df8a1e5
ARROW_UTF8PROC_BUILD_VERSION=v2.10.0
ARROW_UTF8PROC_BUILD_SHA256_CHECKSUM=6f4f1b639daa6dca9f80bc5db1233e9cbaa31a67790887106160b33ef743f136
-ARROW_XSIMD_BUILD_VERSION=13.0.0
-ARROW_XSIMD_BUILD_SHA256_CHECKSUM=8bdbbad0c3e7afa38d88d0d484d70a1671a1d8aefff03f4223ab2eb6a41110a3
+ARROW_XSIMD_BUILD_VERSION=14.0.0
+ARROW_XSIMD_BUILD_SHA256_CHECKSUM=17de0236954955c10c09d6938d4c5f3a3b92d31be5dadd1d5d09fc1b15490dce
ARROW_ZLIB_BUILD_VERSION=1.3.1
ARROW_ZLIB_BUILD_SHA256_CHECKSUM=9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23
ARROW_ZSTD_BUILD_VERSION=1.5.7
diff --git a/dev/release/setup-rhel-rebuilds.sh
b/dev/release/setup-rhel-rebuilds.sh
index d6b335433a..3c8c9280ef 100755
--- a/dev/release/setup-rhel-rebuilds.sh
+++ b/dev/release/setup-rhel-rebuilds.sh
@@ -16,20 +16,14 @@
# specific language governing permissions and limitations
# under the License.
-
# A script to install dependencies required for release
-# verification Red Hat Enterprise Linux 8 clones in particular
-# on AlmaLinux 8 and Rocky Linux 8
+# verification on Red Hat Enterprise Linux 10 clones in particular
+# on AlmaLinux 10
set -exu
dnf -y install 'dnf-command(config-manager)'
-dnf config-manager --set-enabled powertools
dnf -y update
-dnf -y module disable nodejs
-dnf -y module enable nodejs:18
-dnf -y module disable ruby
-dnf -y module enable ruby:2.7
dnf -y groupinstall "Development Tools"
dnf -y install \
cmake \
@@ -40,15 +34,12 @@ dnf -y install \
llvm-toolset \
ncurses-devel \
ninja-build \
- nodejs \
openssl-devel \
- python3.12-devel \
+ python3-devel \
ruby-devel \
sqlite-devel \
vala-devel \
wget \
which
-npm install -g yarn
-
python3 -m ensurepip --upgrade
diff --git a/dev/release/verify-apt.sh b/dev/release/verify-apt.sh
index bfb1019b8f..73a0b15607 100755
--- a/dev/release/verify-apt.sh
+++ b/dev/release/verify-apt.sh
@@ -165,7 +165,7 @@ if [ "${cmake_version_major}" -gt "3" ] || \
cmake .
make -j$(nproc)
./arrow-example
- c++ -o arrow-example example.cc $(pkg-config --cflags --libs arrow)
-std=c++17
+ c++ -o arrow-example example.cc $(pkg-config --cflags --libs arrow)
-std=c++20
./arrow-example
popd
fi
diff --git a/dev/release/verify-yum.sh b/dev/release/verify-yum.sh
index c3b896ef19..d642f80629 100755
--- a/dev/release/verify-yum.sh
+++ b/dev/release/verify-yum.sh
@@ -194,7 +194,7 @@ if [ "${cmake_version_major}" -gt "3" ] || \
${cmake_command} .
make -j$(nproc)
./arrow-example
- c++ -o arrow-example example.cc $(pkg-config --cflags --libs arrow)
-std=c++17
+ c++ -o arrow-example example.cc $(pkg-config --cflags --libs arrow)
-std=c++2a
./arrow-example
popd
fi
diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb
b/dev/tasks/homebrew-formulae/apache-arrow.rb
index 09971286f3..f93a56f7f2 100644
--- a/dev/tasks/homebrew-formulae/apache-arrow.rb
+++ b/dev/tasks/homebrew-formulae/apache-arrow.rb
@@ -121,7 +121,7 @@ class ApacheArrow < Formula
return 0;
}
CPP
- system ENV.cxx, "test.cpp", "-std=c++17", "-I#{include}", "-L#{lib}",
"-larrow", "-o", "test"
+ system ENV.cxx, "test.cpp", "-std=c++20", "-I#{include}", "-L#{lib}",
"-larrow", "-o", "test"
system "./test"
end
end
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-10/Dockerfile
b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-10/Dockerfile
index 3a54e56da0..78134ab816 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-10/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-10/Dockerfile
@@ -47,7 +47,6 @@ RUN \
llvm-devel \
llvm-static \
lz4-devel \
- make \
ncurses-devel \
ninja-build \
openssl-devel \
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile
b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile
index 324975a600..2a389a82da 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile
@@ -18,12 +18,17 @@
ARG FROM=almalinux:8
FROM ${FROM}
+ENV SCL=gcc-toolset-14
+
ARG DEBUG
RUN \
quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
dnf install -y ${quiet} epel-release && \
dnf install --enablerepo=powertools -y ${quiet} \
+ ${SCL}-annobin-plugin-gcc \
+ ${SCL}-gcc-c++ \
+ ${SCL}-libatomic-devel \
bison \
boost-devel \
brotli-devel \
@@ -34,7 +39,6 @@ RUN \
cmake \
curl-devel \
flex \
- gcc-c++ \
gflags-devel \
git \
glog-devel \
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile
b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile
index 45b86dcdfa..065ddcc82c 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile
@@ -37,7 +37,6 @@ RUN \
cmake \
curl-devel \
flex \
- gcc-c++ \
gflags-devel \
git \
gobject-introspection-devel \
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
index 9536ce55cc..8cc272c35a 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -179,12 +179,33 @@ cd c_glib
%if %{enable_glib_doc}
pip3 install gi-docgen
%endif
+if [ -n "${X_SCLS:-}" ]; then
+ # gcc-toolset-X may need libstdc++_nonshared.a when we build C++
+ # programs. In general, it's automatically done by GNU ld script.
+ # For example, gcc-toolset-12 has it at
+ # /opt/rh/gcc-toolset-12/root/usr/lib/gcc/x86_64-redhat-linux/12/libstdc++.so
+ # and its content is the following:
+ #
+ # /* GNU ld script
+ # Use the shared library, but some functions are only in
+ # the static library, so try that secondarily. */
+ # OUTPUT_FORMAT(elf64-x86-64)
+ # INPUT ( /usr/lib64/libstdc++.so.6 -lstdc++_nonshared )
+ #
+ # If we use --as-needed linker option, it may not be used. If it's
+ # not used, "undefined reference to symbol
+ # '_ZTISt20bad_array_new_length@@CXXABI_1.3.8'" error will happen.
+ use_asneeded=false
+else
+ use_asneeded=true
+fi
meson setup build \
--default-library=both \
--libdir=%{_libdir} \
--prefix=%{_prefix} \
-Darrow_cpp_build_dir=../cpp/%{arrow_cmake_builddir} \
-Darrow_cpp_build_type=$cpp_build_type \
+ -Db_asneeded=${use_asneeded} \
%if %{enable_glib_doc}
-Ddoc=true \
%endif
diff --git a/dev/tasks/linux-packages/yum/build.sh
b/dev/tasks/linux-packages/yum/build.sh
index d4631f92a1..16614967ee 100755
--- a/dev/tasks/linux-packages/yum/build.sh
+++ b/dev/tasks/linux-packages/yum/build.sh
@@ -147,6 +147,17 @@ WHICH_STRIP
run cat <<USE_SCL_STRIP >> ~/.rpmmacros
%__strip $(run scl enable ${SCL} ./which-strip.sh)
USE_SCL_STRIP
+
+ case "${SCL}" in
+ gcc-toolset-*)
+ gcc_major=${SCL#gcc-toolset-}
+ # Apply workaround from
https://docs.redhat.com/en/documentation/red_hat_enterprise_linux/9/html/developing_c_and_cpp_applications_in_rhel_9/assembly_additional-toolsets-for-development-rhel-9_developing-applications#ref_specifics-of-annobin-in-gcc-toolset-12_annobin
+ pushd
/opt/rh/${SCL}/root/usr/lib/gcc/$(arch)-redhat-linux/${gcc_major}/plugin/
+ ln -s annobin.so gcc-annobin.so
+ popd
+ ;;
+ esac
+
if [ "${DEBUG:-no}" = "yes" ]; then
run scl enable ${SCL} ./build.sh
else
diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml
index f57901371b..cedb567f2c 100644
--- a/dev/tasks/r/github.packages.yml
+++ b/dev/tasks/r/github.packages.yml
@@ -176,6 +176,8 @@ jobs:
shell: bash
env:
ARROW_HOME: "arrow"
+ MINGW_ARCH: "ucrt64"
+ RTOOLS_VERSION: 40
{{ macros.github_set_sccache_envvars()|indent(8) }}
run: arrow/ci/scripts/r_windows_build.sh
- name: Create Checksum
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 10934fd9a8..065556c805 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -46,13 +46,9 @@ groups:
test:
- test-*
- # Can be removed after we improved C++20 support
- - ~test-debian-experimental-cpp-gcc-15
cpp:
- test-*cpp*
- # Can be removed after we improved C++20 support
- - ~test-debian-experimental-cpp-gcc-15
- example-*cpp*
c-glib:
@@ -117,8 +113,6 @@ groups:
nightly-tests:
- test-*
- # Can be removed after we improved C++20 support
- - ~test-debian-experimental-cpp-gcc-15
- example-*
nightly-packaging:
@@ -314,7 +308,7 @@ tasks:
######################## Linux verification #################################
{% for distribution, version in [("conda", "latest"),
- ("almalinux", "8"),
+ ("almalinux", "10"),
("ubuntu", "22.04"),
("ubuntu", "24.04")] %}
{% for target in ["cpp",
@@ -445,8 +439,6 @@ tasks:
template: docker-tests/github.linux.yml
params:
env:
- CLANG_TOOLS: 15
- LLVM: 15
UBUNTU: 24.04
image: ubuntu-cpp
@@ -455,9 +447,7 @@ tasks:
template: docker-tests/github.linux.yml
params:
env:
- CLANG_TOOLS: 15
GCC: 14
- LLVM: 15
UBUNTU: 24.04
# rapidjson 1.1.0 has an error caught by gcc 14.
# https://github.com/Tencent/rapidjson/issues/718
@@ -493,7 +483,7 @@ tasks:
ARCH: "amd64"
DEBIAN: "experimental"
GCC: "15"
- flags: "-e CMAKE_CXX_STANDARD=20"
+ LLVM: "20"
image: debian-cpp
test-fedora-42-cpp:
@@ -748,8 +738,7 @@ tasks:
{% for r_org, r_image, r_tag in [("rhub", "ubuntu-release", "latest"),
("rocker", "r-ver", "latest"),
- ("rstudio", "r-base", "4.2-focal"),
- ("rstudio", "r-base", "4.1-focal")] %}
+ ("rstudio", "r-base", "4.2-jammy")] %}
test-r-{{ r_org }}-{{ r_image }}-{{ r_tag }}:
ci: azure
template: r/azure.linux.yml
diff --git a/dev/tasks/verify-rc/github.macos.yml
b/dev/tasks/verify-rc/github.macos.yml
index 315083543c..d20d78307b 100644
--- a/dev/tasks/verify-rc/github.macos.yml
+++ b/dev/tasks/verify-rc/github.macos.yml
@@ -22,7 +22,7 @@
{% set use_conda = use_conda|default(False) %}
# env: is generated by macros.github_header()
# Current oldest supported version according to https://endoflife.date/macos
- MACOSX_DEPLOYMENT_TARGET: "12.0"
+ MACOSX_DEPLOYMENT_TARGET: "14.0"
jobs:
verify:
@@ -64,19 +64,6 @@ jobs:
brew install gzip
{% endif %}
- - uses: actions/setup-java@v2
- with:
- distribution: 'temurin'
- java-version: '11'
-
- - uses: actions/setup-dotnet@v4
- with:
- dotnet-version: '8.0.x'
-
- - uses: actions/setup-node@v4
- with:
- node-version: '20'
-
- name: Run verification
shell: bash
env:
diff --git a/docs/source/cpp/conventions.rst b/docs/source/cpp/conventions.rst
index ba61d747db..8ea625c0b8 100644
--- a/docs/source/cpp/conventions.rst
+++ b/docs/source/cpp/conventions.rst
@@ -29,7 +29,7 @@ there may be exceptions.
Language version
----------------
-Starting with version 10.0, Arrow C++ is C++17-compatible.
+Starting with version 23.0, Arrow C++ requires C++20 or later.
Namespacing
-----------
diff --git a/docs/source/developers/cpp/building.rst
b/docs/source/developers/cpp/building.rst
index 0c37ee9450..33530779f5 100644
--- a/docs/source/developers/cpp/building.rst
+++ b/docs/source/developers/cpp/building.rst
@@ -39,8 +39,8 @@ out-of-source. If you are not familiar with this terminology:
Building requires:
-* A C++17-enabled compiler. On Linux, gcc 9 and higher should be
- sufficient. For Windows, at least Visual Studio VS2017 is required.
+* A C++20-enabled compiler. On Linux, gcc 12 and higher should be
+ sufficient.
* CMake 3.25 or higher
* On Linux and macOS, either ``make`` or ``ninja`` build utilities
* At least 1GB of RAM for a minimal build, 4GB for a minimal
diff --git a/docs/source/python/integration/extending.rst
b/docs/source/python/integration/extending.rst
index 30750c21bc..dbcdc3271a 100644
--- a/docs/source/python/integration/extending.rst
+++ b/docs/source/python/integration/extending.rst
@@ -441,7 +441,7 @@ To build this module, you will need a slightly customized
``setup.py`` file
ext.library_dirs.extend(pa.get_library_dirs())
if os.name == 'posix':
- ext.extra_compile_args.append('-std=c++17')
+ ext.extra_compile_args.append('-std=c++20')
setup(ext_modules=ext_modules)
diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt
index 1fef5d9408..9fa737f687 100644
--- a/matlab/CMakeLists.txt
+++ b/matlab/CMakeLists.txt
@@ -98,7 +98,7 @@ function(build_arrow)
endfunction()
-set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD 20)
set(MLARROW_VERSION "23.0.0-SNAPSHOT")
string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION
"${MLARROW_VERSION}")
diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
index 8cc7b78f7c..cc021b3400 100644
--- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
+++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
@@ -131,7 +131,7 @@ libmexclass_client_add_proxy_library(
LINK_LIBRARIES arrow_shared
)
# Use C++17
-target_compile_features(${MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_LIBRARY_NAME}
PRIVATE cxx_std_17)
+target_compile_features(${MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_LIBRARY_NAME}
PRIVATE cxx_std_20)
target_compile_definitions(${MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_LIBRARY_NAME}
PRIVATE ARROW_MATLAB_EXPORTING)
# When building Arrow from source, Arrow must be built before building the
client Proxy library.
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index d80f21333e..b1c8e32494 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -230,7 +230,7 @@ endif()
# For any C code, use the same flags.
set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS}")
-# Add C++-only flags, like -std=c++17
+# Add C++-only flags, like -std=c++20
set(CMAKE_CXX_FLAGS "${CXX_ONLY_FLAGS} ${CMAKE_CXX_FLAGS}")
message(STATUS "CMAKE_C_FLAGS: ${CMAKE_C_FLAGS}")
diff --git a/python/pyarrow/tests/test_cython.py
b/python/pyarrow/tests/test_cython.py
index e0116a4bb7..a142e66db5 100644
--- a/python/pyarrow/tests/test_cython.py
+++ b/python/pyarrow/tests/test_cython.py
@@ -28,9 +28,9 @@ import pyarrow.tests.util as test_util
here = os.path.dirname(os.path.abspath(__file__))
test_ld_path = os.environ.get('PYARROW_TEST_LD_PATH', '')
if os.name == 'posix':
- compiler_opts = ['-std=c++17']
+ compiler_opts = ['-std=c++20']
elif os.name == 'nt':
- compiler_opts = ['-D_ENABLE_EXTENDED_ALIGNED_STORAGE', '/std:c++17']
+ compiler_opts = ['-D_ENABLE_EXTENDED_ALIGNED_STORAGE', '/std:c++20']
else:
compiler_opts = []