This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 07ac9fd86c ARROW-16717: [C++] Add support for system jemalloc (#13373)
07ac9fd86c is described below
commit 07ac9fd86c6225f493943e4ab0ff35b0fdbfb2ae
Author: Sutou Kouhei <[email protected]>
AuthorDate: Thu Jun 16 18:04:19 2022 +0900
ARROW-16717: [C++] Add support for system jemalloc (#13373)
Lead-authored-by: Sutou Kouhei <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
ci/conan/all/conanfile.py | 1 +
ci/scripts/conan_build.sh | 3 +
cpp/CMakeLists.txt | 6 +-
cpp/cmake_modules/DefineOptions.cmake | 4 +
cpp/cmake_modules/Findjemalloc.cmake | 113 ++++++------
cpp/cmake_modules/ThirdpartyToolchain.cmake | 35 +++-
cpp/src/arrow/CMakeLists.txt | 8 +-
cpp/src/arrow/memory_pool.cc | 195 +++++----------------
cpp/src/arrow/memory_pool_internal.h | 56 ++++++
cpp/src/arrow/memory_pool_jemalloc.cc | 156 +++++++++++++++++
cpp/src/arrow/memory_pool_test.cc | 1 +
cpp/src/arrow/util/config.h.cmake | 2 +
.../apache-arrow/apt/debian-bookworm/Dockerfile | 1 +
.../apache-arrow/apt/debian-bullseye/Dockerfile | 1 +
.../apache-arrow/apt/debian-buster/Dockerfile | 1 +
.../apache-arrow/apt/ubuntu-bionic/Dockerfile | 1 +
.../apache-arrow/apt/ubuntu-focal/Dockerfile | 1 +
.../apache-arrow/apt/ubuntu-impish/Dockerfile | 1 +
.../apache-arrow/apt/ubuntu-jammy/Dockerfile | 1 +
.../linux-packages/apache-arrow/debian/control.in | 2 +
.../apache-arrow/debian/libarrow-dev.install | 1 +
dev/tasks/linux-packages/apache-arrow/debian/rules | 3 +-
.../apache-arrow/yum/almalinux-8/Dockerfile | 1 +
.../apache-arrow/yum/almalinux-9/Dockerfile | 1 +
.../apache-arrow/yum/amazon-linux-2/Dockerfile | 1 +
.../linux-packages/apache-arrow/yum/arrow.spec.in | 7 +-
.../apache-arrow/yum/centos-7/Dockerfile | 1 +
.../apache-arrow/yum/centos-8-stream/Dockerfile | 1 +
dev/tasks/tasks.yml | 1 +
29 files changed, 372 insertions(+), 234 deletions(-)
diff --git a/ci/conan/all/conanfile.py b/ci/conan/all/conanfile.py
index e0b325bfda..a0695baa70 100644
--- a/ci/conan/all/conanfile.py
+++ b/ci/conan/all/conanfile.py
@@ -361,6 +361,7 @@ class ArrowConan(ConanFile):
self._cmake.definitions["ARROW_CSV"] = self.options.with_csv
self._cmake.definitions["ARROW_CUDA"] = self.options.with_cuda
self._cmake.definitions["ARROW_JEMALLOC"] = self._with_jemalloc()
+ self._cmake.definitions["jemalloc_SOURCE"] = "SYSTEM"
self._cmake.definitions["ARROW_JSON"] = self.options.with_json
self._cmake.definitions["BOOST_SOURCE"] = "SYSTEM"
diff --git a/ci/scripts/conan_build.sh b/ci/scripts/conan_build.sh
index 412570b5d9..1d597f7ab0 100755
--- a/ci/scripts/conan_build.sh
+++ b/ci/scripts/conan_build.sh
@@ -31,6 +31,9 @@ conan_args=()
if [ -n "${ARROW_CONAN_PARQUET:-}" ]; then
conan_args+=(--options arrow:parquet=${ARROW_CONAN_PARQUET})
fi
+if [ -n "${ARROW_CONAN_WITH_JEMALLOC:-}" ]; then
+ conan_args+=(--options arrow:with_jemalloc=${ARROW_CONAN_WITH_JEMALLOC})
+fi
if [ -n "${ARROW_CONAN_WITH_LZ4:-}" ]; then
conan_args+=(--options arrow:with_lz4=${ARROW_CONAN_WITH_LZ4})
fi
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5d198ec8f5..f1b41cfacf 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -926,10 +926,8 @@ if(ARROW_BUILD_BENCHMARKS)
endif()
if(ARROW_JEMALLOC)
- add_definitions(-DARROW_JEMALLOC)
- add_definitions(-DARROW_JEMALLOC_INCLUDE_DIR=${JEMALLOC_INCLUDE_DIR})
- list(APPEND ARROW_LINK_LIBS jemalloc::jemalloc)
- list(APPEND ARROW_STATIC_LINK_LIBS jemalloc::jemalloc)
+ list(APPEND ARROW_LINK_LIBS jemalloc)
+ list(APPEND ARROW_STATIC_LINK_LIBS jemalloc)
endif()
if(ARROW_MIMALLOC)
diff --git a/cpp/cmake_modules/DefineOptions.cmake
b/cpp/cmake_modules/DefineOptions.cmake
index a3df2e637d..8dc7d77ed4 100644
--- a/cpp/cmake_modules/DefineOptions.cmake
+++ b/cpp/cmake_modules/DefineOptions.cmake
@@ -344,6 +344,10 @@ if(ARROW_DEFINE_OPTIONS)
define_option(ARROW_GRPC_USE_SHARED "Rely on gRPC shared libraries where
relevant"
${ARROW_DEPENDENCY_USE_SHARED})
+ define_option(ARROW_JEMALLOC_USE_SHARED
+ "Rely on jemalloc shared libraries where relevant"
+ ${ARROW_DEPENDENCY_USE_SHARED})
+
define_option(ARROW_LZ4_USE_SHARED "Rely on lz4 shared libraries where
relevant"
${ARROW_DEPENDENCY_USE_SHARED})
diff --git a/cpp/cmake_modules/Findjemalloc.cmake
b/cpp/cmake_modules/Findjemalloc.cmake
index 98816e9522..db30f71d25 100644
--- a/cpp/cmake_modules/Findjemalloc.cmake
+++ b/cpp/cmake_modules/Findjemalloc.cmake
@@ -1,4 +1,3 @@
-#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -17,78 +16,68 @@
#
# find_package(jemalloc)
#
-# Variables used by this module, they can change the default behaviour and need
-# to be set before calling find_package:
-#
-# JEMALLOC_HOME -
-# When set, this path is inspected instead of standard library locations as
-# the root of the jemalloc installation. The environment variable
-# JEMALLOC_HOME overrides this veriable.
-#
# This module defines
-# JEMALLOC_INCLUDE_DIR, directory containing headers
-# JEMALLOC_SHARED_LIB, path to libjemalloc.so/dylib
-# JEMALLOC_FOUND, whether jemalloc has been found
+# jemalloc::jemalloc, target to use jemalloc
-if(NOT "${JEMALLOC_HOME}" STREQUAL "")
- file(TO_CMAKE_PATH "${JEMALLOC_HOME}" _native_path)
- list(APPEND _jemalloc_roots ${_native_path})
-elseif(JEMALLOC_HOME)
- list(APPEND _jemalloc_roots ${JEMALLOC_HOME})
+if(ARROW_JEMALLOC_USE_SHARED)
+ set(jemalloc_LIB_NAMES)
+ if(CMAKE_IMPORT_LIBRARY_SUFFIX)
+ list(APPEND jemalloc_LIB_NAMES
+
"${CMAKE_IMPORT_LIBRARY_PREFIX}jemalloc${CMAKE_IMPORT_LIBRARY_SUFFIX}")
+ endif()
+ list(APPEND jemalloc_LIB_NAMES
+ "${CMAKE_SHARED_LIBRARY_PREFIX}jemalloc${CMAKE_SHARED_LIBRARY_SUFFIX}")
+else()
+ set(jemalloc_LIB_NAMES
+ "${CMAKE_STATIC_LIBRARY_PREFIX}jemalloc${CMAKE_STATIC_LIBRARY_SUFFIX}")
endif()
-set(LIBJEMALLOC_NAMES jemalloc libjemalloc.so.1 libjemalloc.so.2
libjemalloc.dylib)
-
-# Try the parameterized roots, if they exist
-if(_jemalloc_roots)
- find_path(JEMALLOC_INCLUDE_DIR
+if(jemalloc_ROOT)
+ find_library(jemalloc_LIB
+ NAMES ${jemallc_LIB_NAMES}
+ PATHS ${jemallc_ROOT}
+ PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
+ NO_DEFAULT_PATH)
+ find_path(jemalloc_INCLUDE_DIR
NAMES jemalloc/jemalloc.h
- PATHS ${_jemalloc_roots}
+ PATHS ${jemalloc_ROOT}
NO_DEFAULT_PATH
- PATH_SUFFIXES "include")
- find_library(JEMALLOC_SHARED_LIB
- NAMES ${LIBJEMALLOC_NAMES}
- PATHS ${_jemalloc_roots}
- NO_DEFAULT_PATH
- PATH_SUFFIXES "lib")
- find_library(JEMALLOC_STATIC_LIB
- NAMES jemalloc_pic
- PATHS ${_jemalloc_roots}
- NO_DEFAULT_PATH
- PATH_SUFFIXES "lib")
-else()
- find_path(JEMALLOC_INCLUDE_DIR NAMES jemalloc/jemalloc.h)
- message(STATUS ${JEMALLOC_INCLUDE_DIR})
- find_library(JEMALLOC_SHARED_LIB NAMES ${LIBJEMALLOC_NAMES})
- message(STATUS ${JEMALLOC_SHARED_LIB})
- find_library(JEMALLOC_STATIC_LIB NAMES jemalloc_pic)
- message(STATUS ${JEMALLOC_STATIC_LIB})
-endif()
+ PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
-if(JEMALLOC_INCLUDE_DIR AND JEMALLOC_SHARED_LIB)
- set(JEMALLOC_FOUND TRUE)
else()
- set(JEMALLOC_FOUND FALSE)
+ find_package(PkgConfig QUIET)
+ pkg_check_modules(jemalloc_PC jemalloc)
+ if(jemalloc_PC_FOUND)
+ set(jemalloc_INCLUDE_DIR "${jemalloc_PC_INCLUDEDIR}")
+ list(APPEND jemalloc_PC_LIBRARY_DIRS "${jemalloc_PC_LIBDIR}")
+ find_library(jemalloc_LIB
+ NAMES ${jemalloc_LIB_NAMES}
+ PATHS ${jemalloc_PC_LIBRARY_DIRS}
+ NO_DEFAULT_PATH
+ PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
+ else()
+ find_library(jemalloc_LIB
+ NAMES ${jemalloc_LIB_NAMES}
+ PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
+ find_path(jemalloc_INCLUDE_DIR
+ NAMES jemalloc/jemalloc.h
+ PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+ endif()
endif()
-if(JEMALLOC_FOUND)
- if(NOT jemalloc_FIND_QUIETLY)
- message(STATUS "Found the jemalloc library: ${JEMALLOC_LIBRARIES}")
- endif()
-else()
- if(NOT jemalloc_FIND_QUIETLY)
- set(JEMALLOC_ERR_MSG "Could not find the jemalloc library. Looked in ")
- if(_jemalloc_roots)
- set(JEMALLOC_ERR_MSG "${JEMALLOC_ERR_MSG} in ${_jemalloc_roots}.")
+find_package_handle_standard_args(jemalloc REQUIRED_VARS jemalloc_LIB
+ jemalloc_INCLUDE_DIR)
+
+if(jemalloc_FOUND)
+ if(NOT TARGET jemalloc::jemalloc)
+ if(ARROW_JEMALLOC_USE_SHARED)
+ add_library(jemalloc::jemalloc SHARED IMPORTED)
else()
- set(JEMALLOC_ERR_MSG "${JEMALLOC_ERR_MSG} system search paths.")
+ add_library(jemalloc::jemalloc STATIC IMPORTED)
endif()
- if(jemalloc_FIND_REQUIRED)
- message(FATAL_ERROR "${JEMALLOC_ERR_MSG}")
- else(jemalloc_FIND_REQUIRED)
- message(STATUS "${JEMALLOC_ERR_MSG}")
- endif(jemalloc_FIND_REQUIRED)
+ set_target_properties(jemalloc::jemalloc
+ PROPERTIES IMPORTED_LOCATION "${jemalloc_LIB}"
+ INTERFACE_INCLUDE_DIRECTORIES
+ "${jemalloc_INCLUDE_DIR}")
endif()
endif()
-
-mark_as_advanced(JEMALLOC_INCLUDE_DIR JEMALLOC_SHARED_LIB)
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index d57055f0ea..2abbb52b52 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -61,6 +61,7 @@ set(ARROW_THIRDPARTY_DEPENDENCIES
google_cloud_cpp_storage
gRPC
GTest
+ jemalloc
LLVM
lz4
nlohmann_json
@@ -102,6 +103,11 @@ if("${lz4_SOURCE}" STREQUAL "" AND NOT "${Lz4_SOURCE}"
STREQUAL "")
set(lz4_SOURCE ${Lz4_SOURCE})
endif()
+# For backward compatibility. We use bundled jemalloc by default.
+if("${jemalloc_SOURCE}" STREQUAL "")
+ set(jemalloc_SOURCE "BUNDLED")
+endif()
+
message(STATUS "Using ${ARROW_DEPENDENCY_SOURCE} approach to find
dependencies")
if(ARROW_DEPENDENCY_SOURCE STREQUAL "CONDA")
@@ -162,6 +168,8 @@ macro(build_dependency DEPENDENCY_NAME)
build_grpc()
elseif("${DEPENDENCY_NAME}" STREQUAL "GTest")
build_gtest()
+ elseif("${DEPENDENCY_NAME}" STREQUAL "jemalloc")
+ build_jemalloc()
elseif("${DEPENDENCY_NAME}" STREQUAL "lz4")
build_lz4()
elseif("${DEPENDENCY_NAME}" STREQUAL "nlohmann_json")
@@ -1755,13 +1763,12 @@ endif()
# ----------------------------------------------------------------------
# jemalloc - Unix-only high-performance allocator
-if(ARROW_JEMALLOC)
- message(STATUS "Building (vendored) jemalloc from source")
- # We only use a vendored jemalloc as we want to control its version.
- # Also our build of jemalloc is specially prefixed so that it will not
+macro(build_jemalloc)
+ # Our build of jemalloc is specially prefixed so that it will not
# conflict with the default allocator as well as other jemalloc
# installations.
- # find_package(jemalloc)
+
+ message(STATUS "Building jemalloc from source")
set(ARROW_JEMALLOC_USE_SHARED OFF)
set(JEMALLOC_PREFIX
@@ -1816,15 +1823,23 @@ if(ARROW_JEMALLOC)
set(JEMALLOC_INCLUDE_DIR
"${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src/")
# The include directory must exist before it is referenced by a target.
file(MAKE_DIRECTORY "${JEMALLOC_INCLUDE_DIR}")
- add_library(jemalloc::jemalloc STATIC IMPORTED)
- set_target_properties(jemalloc::jemalloc
+ add_library(jemalloc STATIC IMPORTED)
+ set_target_properties(jemalloc
PROPERTIES INTERFACE_LINK_LIBRARIES Threads::Threads
IMPORTED_LOCATION "${JEMALLOC_STATIC_LIB}"
INTERFACE_INCLUDE_DIRECTORIES
"${JEMALLOC_INCLUDE_DIR}")
- add_dependencies(jemalloc::jemalloc jemalloc_ep)
+ add_dependencies(jemalloc jemalloc_ep)
+
+ list(APPEND ARROW_BUNDLED_STATIC_LIBS jemalloc)
- list(APPEND ARROW_BUNDLED_STATIC_LIBS jemalloc::jemalloc)
+ set(jemalloc_VENDORED TRUE)
+ # For config.h.cmake
+ set(ARROW_JEMALLOC_VENDORED ${jemalloc_VENDORED})
+endmacro()
+
+if(ARROW_JEMALLOC)
+ resolve_dependency(jemalloc)
endif()
# ----------------------------------------------------------------------
@@ -1882,6 +1897,8 @@ if(ARROW_MIMALLOC)
add_dependencies(toolchain mimalloc_ep)
list(APPEND ARROW_BUNDLED_STATIC_LIBS mimalloc::mimalloc)
+
+ set(mimalloc_VENDORED TRUE)
endif()
# ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 15d8574c6e..ccf643bbc5 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -239,6 +239,10 @@ set(ARROW_SRCS
vendored/double-conversion/diy-fp.cc
vendored/double-conversion/strtod.cc)
+if(ARROW_JEMALLOC)
+ list(APPEND ARROW_SRCS memory_pool_jemalloc.cc)
+endif()
+
append_avx2_src(util/bpacking_avx2.cc)
append_avx512_src(util/bpacking_avx512.cc)
@@ -325,10 +329,10 @@ set(ARROW_TESTING_SRCS
# (see https://gitlab.kitware.com/cmake/cmake/issues/19677)
set(_allocator_dependencies "") # Empty list
-if(ARROW_JEMALLOC)
+if(jemalloc_VENDORED)
list(APPEND _allocator_dependencies jemalloc_ep)
endif()
-if(ARROW_MIMALLOC)
+if(mimalloc_VENDORED)
list(APPEND _allocator_dependencies mimalloc_ep)
endif()
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 9130b29748..f8682ad313 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-#include "arrow/memory_pool.h"
+#include "arrow/memory_pool_internal.h"
#include <algorithm> // IWYU pragma: keep
#include <atomic>
@@ -35,6 +35,7 @@
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/util/bit_util.h"
+#include "arrow/util/config.h"
#include "arrow/util/debug.h"
#include "arrow/util/int_util_overflow.h"
#include "arrow/util/io_util.h"
@@ -48,67 +49,24 @@
#include <malloc.h>
#endif
-#ifdef ARROW_JEMALLOC
-// Needed to support jemalloc 3 and 4
-#define JEMALLOC_MANGLE
-// Explicitly link to our version of jemalloc
-#include "jemalloc_ep/dist/include/jemalloc/jemalloc.h"
-#endif
-
#ifdef ARROW_MIMALLOC
#include <mimalloc.h>
#endif
-#ifdef ARROW_JEMALLOC
+namespace arrow {
-// Compile-time configuration for jemalloc options.
-// Note the prefix ("je_arrow_") must match the symbol prefix given when
-// building jemalloc.
-// See discussion in https://github.com/jemalloc/jemalloc/issues/1621
-
-// ARROW-6910(wesm): we found that jemalloc's default behavior with respect to
-// dirty / muzzy pages (see definitions of these in the jemalloc documentation)
-// conflicted with user expectations, and would even cause memory use problems
-// in some cases. By enabling the background_thread option and reducing the
-// decay time from 10 seconds to 1 seconds, memory is released more
-// aggressively (and in the background) to the OS. This can be configured
-// further by using the arrow::jemalloc_set_decay_ms API
-
-#undef USE_JEMALLOC_BACKGROUND_THREAD
-#ifndef __APPLE__
-// ARROW-6977: jemalloc's background_thread isn't always enabled on macOS
-#define USE_JEMALLOC_BACKGROUND_THREAD
-#endif
+namespace memory_pool {
-// In debug mode, add memory poisoning on alloc / free
-#ifdef NDEBUG
-#define JEMALLOC_DEBUG_OPTIONS ""
-#else
-#define JEMALLOC_DEBUG_OPTIONS ",junk:true"
-#endif
+namespace internal {
-const char* je_arrow_malloc_conf =
- ("oversize_threshold:0"
-#ifdef USE_JEMALLOC_BACKGROUND_THREAD
- ",dirty_decay_ms:1000"
- ",muzzy_decay_ms:1000"
- ",background_thread:true"
-#else
- // ARROW-6994: return memory immediately to the OS if the
- // background_thread option isn't available
- ",dirty_decay_ms:0"
- ",muzzy_decay_ms:0"
-#endif
- JEMALLOC_DEBUG_OPTIONS); // NOLINT: whitespace/parens
+alignas(kAlignment) int64_t zero_size_area[1] = {kDebugXorSuffix};
-#endif // ARROW_JEMALLOC
+} // namespace internal
-namespace arrow {
+} // namespace memory_pool
namespace {
-constexpr size_t kAlignment = 64;
-
constexpr char kDefaultBackendEnvVar[] = "ARROW_DEFAULT_MEMORY_POOL";
constexpr char kDebugMemoryEnvVar[] = "ARROW_DEBUG_MEMORY_POOL";
@@ -188,14 +146,6 @@ MemoryPoolBackend DefaultBackend() {
return default_backend.backend;
}
-static constexpr int64_t kDebugXorSuffix = -0x181fe80e0b464188LL;
-
-// A static piece of memory for 0-size allocations, so as to return
-// an aligned non-null pointer. Note the correct value for DebugAllocator
-// checks is hardcoded.
-alignas(kAlignment) static int64_t zero_size_area[1] = {kDebugXorSuffix};
-static uint8_t* const kZeroSizeArea =
reinterpret_cast<uint8_t*>(&zero_size_area);
-
using MemoryDebugHandler = std::function<void(uint8_t* ptr, int64_t size,
const Status&)>;
struct DebugState {
@@ -276,7 +226,7 @@ class DebugAllocator {
public:
static Status AllocateAligned(int64_t size, uint8_t** out) {
if (size == 0) {
- *out = kZeroSizeArea;
+ *out = memory_pool::internal::kZeroSizeArea;
} else {
ARROW_ASSIGN_OR_RAISE(int64_t raw_size, RawSize(size));
RETURN_NOT_OK(WrappedAllocator::AllocateAligned(raw_size, out));
@@ -289,14 +239,14 @@ class DebugAllocator {
static Status ReallocateAligned(int64_t old_size, int64_t new_size,
uint8_t** ptr) {
CheckAllocatedArea(*ptr, old_size, "reallocation");
- if (*ptr == kZeroSizeArea) {
+ if (*ptr == memory_pool::internal::kZeroSizeArea) {
return AllocateAligned(new_size, ptr);
}
if (new_size == 0) {
// Note that an overflow check isn't needed as `old_size` is supposed to
have
// been successfully passed to AllocateAligned() before.
WrappedAllocator::DeallocateAligned(*ptr, old_size + kOverhead);
- *ptr = kZeroSizeArea;
+ *ptr = memory_pool::internal::kZeroSizeArea;
return Status::OK();
}
ARROW_ASSIGN_OR_RAISE(int64_t raw_new_size, RawSize(new_size));
@@ -308,7 +258,7 @@ class DebugAllocator {
static void DeallocateAligned(uint8_t* ptr, int64_t size) {
CheckAllocatedArea(ptr, size, "deallocation");
- if (ptr != kZeroSizeArea) {
+ if (ptr != memory_pool::internal::kZeroSizeArea) {
WrappedAllocator::DeallocateAligned(ptr, size + kOverhead);
}
}
@@ -323,12 +273,13 @@ class DebugAllocator {
static void InitAllocatedArea(uint8_t* ptr, int64_t size) {
DCHECK_NE(size, 0);
- util::SafeStore(ptr + size, size ^ kDebugXorSuffix);
+ util::SafeStore(ptr + size, size ^ memory_pool::internal::kDebugXorSuffix);
}
static void CheckAllocatedArea(uint8_t* ptr, int64_t size, const char*
context) {
// Check that memory wasn't clobbered at the end of the allocated area.
- int64_t stored_size = kDebugXorSuffix ^ util::SafeLoadAs<int64_t>(ptr +
size);
+ int64_t stored_size =
+ memory_pool::internal::kDebugXorSuffix ^ util::SafeLoadAs<int64_t>(ptr
+ size);
if (ARROW_PREDICT_FALSE(stored_size != size)) {
auto st = Status::Invalid("Wrong size on ", context, ": given size = ",
size,
", actual size = ", stored_size);
@@ -346,30 +297,33 @@ class SystemAllocator {
// (as of May 2016 64 bytes)
static Status AllocateAligned(int64_t size, uint8_t** out) {
if (size == 0) {
- *out = kZeroSizeArea;
+ *out = memory_pool::internal::kZeroSizeArea;
return Status::OK();
}
#ifdef _WIN32
// Special code path for Windows
*out = reinterpret_cast<uint8_t*>(
- _aligned_malloc(static_cast<size_t>(size), kAlignment));
+ _aligned_malloc(static_cast<size_t>(size),
memory_pool::internal::kAlignment));
if (!*out) {
return Status::OutOfMemory("malloc of size ", size, " failed");
}
#elif defined(sun) || defined(__sun)
- *out = reinterpret_cast<uint8_t*>(memalign(kAlignment,
static_cast<size_t>(size)));
+ *out = reinterpret_cast<uint8_t*>(
+ memalign(memory_pool::internal::kAlignment,
static_cast<size_t>(size)));
if (!*out) {
return Status::OutOfMemory("malloc of size ", size, " failed");
}
#else
- const int result = posix_memalign(reinterpret_cast<void**>(out),
kAlignment,
- static_cast<size_t>(size));
+ const int result =
+ posix_memalign(reinterpret_cast<void**>(out),
memory_pool::internal::kAlignment,
+ static_cast<size_t>(size));
if (result == ENOMEM) {
return Status::OutOfMemory("malloc of size ", size, " failed");
}
if (result == EINVAL) {
- return Status::Invalid("invalid alignment parameter: ", kAlignment);
+ return Status::Invalid("invalid alignment parameter: ",
+ memory_pool::internal::kAlignment);
}
#endif
return Status::OK();
@@ -377,13 +331,13 @@ class SystemAllocator {
static Status ReallocateAligned(int64_t old_size, int64_t new_size,
uint8_t** ptr) {
uint8_t* previous_ptr = *ptr;
- if (previous_ptr == kZeroSizeArea) {
+ if (previous_ptr == memory_pool::internal::kZeroSizeArea) {
DCHECK_EQ(old_size, 0);
return AllocateAligned(new_size, ptr);
}
if (new_size == 0) {
DeallocateAligned(previous_ptr, old_size);
- *ptr = kZeroSizeArea;
+ *ptr = memory_pool::internal::kZeroSizeArea;
return Status::OK();
}
// Note: We cannot use realloc() here as it doesn't guarantee alignment.
@@ -404,7 +358,7 @@ class SystemAllocator {
}
static void DeallocateAligned(uint8_t* ptr, int64_t size) {
- if (ptr == kZeroSizeArea) {
+ if (ptr == memory_pool::internal::kZeroSizeArea) {
DCHECK_EQ(size, 0);
} else {
#ifdef _WIN32
@@ -424,59 +378,6 @@ class SystemAllocator {
}
};
-#ifdef ARROW_JEMALLOC
-
-// Helper class directing allocations to the jemalloc allocator.
-class JemallocAllocator {
- public:
- static Status AllocateAligned(int64_t size, uint8_t** out) {
- if (size == 0) {
- *out = kZeroSizeArea;
- return Status::OK();
- }
- *out = reinterpret_cast<uint8_t*>(
- mallocx(static_cast<size_t>(size), MALLOCX_ALIGN(kAlignment)));
- if (*out == NULL) {
- return Status::OutOfMemory("malloc of size ", size, " failed");
- }
- return Status::OK();
- }
-
- static Status ReallocateAligned(int64_t old_size, int64_t new_size,
uint8_t** ptr) {
- uint8_t* previous_ptr = *ptr;
- if (previous_ptr == kZeroSizeArea) {
- DCHECK_EQ(old_size, 0);
- return AllocateAligned(new_size, ptr);
- }
- if (new_size == 0) {
- DeallocateAligned(previous_ptr, old_size);
- *ptr = kZeroSizeArea;
- return Status::OK();
- }
- *ptr = reinterpret_cast<uint8_t*>(
- rallocx(*ptr, static_cast<size_t>(new_size),
MALLOCX_ALIGN(kAlignment)));
- if (*ptr == NULL) {
- *ptr = previous_ptr;
- return Status::OutOfMemory("realloc of size ", new_size, " failed");
- }
- return Status::OK();
- }
-
- static void DeallocateAligned(uint8_t* ptr, int64_t size) {
- if (ptr == kZeroSizeArea) {
- DCHECK_EQ(size, 0);
- } else {
- dallocx(ptr, MALLOCX_ALIGN(kAlignment));
- }
- }
-
- static void ReleaseUnused() {
- mallctl("arena." ARROW_STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL,
NULL, 0);
- }
-};
-
-#endif // defined(ARROW_JEMALLOC)
-
#ifdef ARROW_MIMALLOC
// Helper class directing allocations to the mimalloc allocator.
@@ -484,11 +385,11 @@ class MimallocAllocator {
public:
static Status AllocateAligned(int64_t size, uint8_t** out) {
if (size == 0) {
- *out = kZeroSizeArea;
+ *out = memory_pool::internal::kZeroSizeArea;
return Status::OK();
}
*out = reinterpret_cast<uint8_t*>(
- mi_malloc_aligned(static_cast<size_t>(size), kAlignment));
+ mi_malloc_aligned(static_cast<size_t>(size),
memory_pool::internal::kAlignment));
if (*out == NULL) {
return Status::OutOfMemory("malloc of size ", size, " failed");
}
@@ -499,17 +400,17 @@ class MimallocAllocator {
static Status ReallocateAligned(int64_t old_size, int64_t new_size,
uint8_t** ptr) {
uint8_t* previous_ptr = *ptr;
- if (previous_ptr == kZeroSizeArea) {
+ if (previous_ptr == memory_pool::internal::kZeroSizeArea) {
DCHECK_EQ(old_size, 0);
return AllocateAligned(new_size, ptr);
}
if (new_size == 0) {
DeallocateAligned(previous_ptr, old_size);
- *ptr = kZeroSizeArea;
+ *ptr = memory_pool::internal::kZeroSizeArea;
return Status::OK();
}
- *ptr = reinterpret_cast<uint8_t*>(
- mi_realloc_aligned(previous_ptr, static_cast<size_t>(new_size),
kAlignment));
+ *ptr = reinterpret_cast<uint8_t*>(mi_realloc_aligned(
+ previous_ptr, static_cast<size_t>(new_size),
memory_pool::internal::kAlignment));
if (*ptr == NULL) {
*ptr = previous_ptr;
return Status::OutOfMemory("realloc of size ", new_size, " failed");
@@ -518,7 +419,7 @@ class MimallocAllocator {
}
static void DeallocateAligned(uint8_t* ptr, int64_t size) {
- if (ptr == kZeroSizeArea) {
+ if (ptr == memory_pool::internal::kZeroSizeArea) {
DCHECK_EQ(size, 0);
} else {
mi_free(ptr);
@@ -624,13 +525,15 @@ class SystemDebugMemoryPool : public
BaseMemoryPoolImpl<DebugAllocator<SystemAll
};
#ifdef ARROW_JEMALLOC
-class JemallocMemoryPool : public BaseMemoryPoolImpl<JemallocAllocator> {
+class JemallocMemoryPool
+ : public BaseMemoryPoolImpl<memory_pool::internal::JemallocAllocator> {
public:
std::string backend_name() const override { return "jemalloc"; }
};
class JemallocDebugMemoryPool
- : public BaseMemoryPoolImpl<DebugAllocator<JemallocAllocator>> {
+ : public BaseMemoryPoolImpl<
+ DebugAllocator<memory_pool::internal::JemallocAllocator>> {
public:
std::string backend_name() const override { return "jemalloc"; }
};
@@ -758,29 +661,11 @@ MemoryPool* default_memory_pool() {
}
}
-#define RETURN_IF_JEMALLOC_ERROR(ERR) \
- do { \
- if (err != 0) { \
- return Status::UnknownError(std::strerror(ERR)); \
- } \
- } while (0)
-
+#ifndef ARROW_JEMALLOC
Status jemalloc_set_decay_ms(int ms) {
-#ifdef ARROW_JEMALLOC
- ssize_t decay_time_ms = static_cast<ssize_t>(ms);
-
- int err = mallctl("arenas.dirty_decay_ms", nullptr, nullptr, &decay_time_ms,
- sizeof(decay_time_ms));
- RETURN_IF_JEMALLOC_ERROR(err);
- err = mallctl("arenas.muzzy_decay_ms", nullptr, nullptr, &decay_time_ms,
- sizeof(decay_time_ms));
- RETURN_IF_JEMALLOC_ERROR(err);
-
- return Status::OK();
-#else
return Status::Invalid("jemalloc support is not built");
-#endif
}
+#endif
///////////////////////////////////////////////////////////////////////
// LoggingMemoryPool implementation
diff --git a/cpp/src/arrow/memory_pool_internal.h
b/cpp/src/arrow/memory_pool_internal.h
new file mode 100644
index 0000000000..df0ee646a4
--- /dev/null
+++ b/cpp/src/arrow/memory_pool_internal.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/memory_pool.h"
+#include "arrow/util/config.h"
+
+namespace arrow {
+
+namespace memory_pool {
+
+namespace internal {
+
+static constexpr size_t kAlignment = 64;
+
+static constexpr int64_t kDebugXorSuffix = -0x181fe80e0b464188LL;
+
+// A static piece of memory for 0-size allocations, so as to return
+// an aligned non-null pointer. Note the correct value for DebugAllocator
+// checks is hardcoded.
+extern int64_t zero_size_area[1];
+static uint8_t* const kZeroSizeArea =
reinterpret_cast<uint8_t*>(&zero_size_area);
+
+#ifdef ARROW_JEMALLOC
+
+// Helper class directing allocations to the jemalloc allocator.
+class JemallocAllocator {
+ public:
+ static Status AllocateAligned(int64_t size, uint8_t** out);
+ static Status ReallocateAligned(int64_t old_size, int64_t new_size,
uint8_t** ptr);
+ static void DeallocateAligned(uint8_t* ptr, int64_t size);
+ static void ReleaseUnused();
+};
+
+#endif // defined(ARROW_JEMALLOC)
+
+} // namespace internal
+
+} // namespace memory_pool
+
+} // namespace arrow
diff --git a/cpp/src/arrow/memory_pool_jemalloc.cc
b/cpp/src/arrow/memory_pool_jemalloc.cc
new file mode 100644
index 0000000000..48a5bac137
--- /dev/null
+++ b/cpp/src/arrow/memory_pool_jemalloc.cc
@@ -0,0 +1,156 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/memory_pool_internal.h"
+#include "arrow/util/logging.h" // IWYU pragma: keep
+
+// We can't put the jemalloc memory pool implementation into
+// memory_pool.c because jemalloc.h may redefine malloc() and its
+// family by macros. If malloc() and its family are redefined by
+// jemalloc, our system memory pool will also use jemalloc's malloc() and
+// its family.
+
+#ifdef ARROW_JEMALLOC_VENDORED
+#define JEMALLOC_MANGLE
+// Explicitly link to our version of jemalloc
+#include "jemalloc_ep/dist/include/jemalloc/jemalloc.h"
+#else
+#include <jemalloc/jemalloc.h>
+#endif
+
+#ifdef ARROW_JEMALLOC_VENDORED
+// Compile-time configuration for vendored jemalloc options.
+// Note the prefix ("je_arrow_") must match the symbol prefix given when
+// building jemalloc.
+// See discussion in https://github.com/jemalloc/jemalloc/issues/1621
+
+// ARROW-6910(wesm): we found that jemalloc's default behavior with respect to
+// dirty / muzzy pages (see definitions of these in the jemalloc documentation)
+// conflicted with user expectations, and would even cause memory use problems
+// in some cases. By enabling the background_thread option and reducing the
+// decay time from 10 seconds to 1 seconds, memory is released more
+// aggressively (and in the background) to the OS. This can be configured
+// further by using the arrow::jemalloc_set_decay_ms API
+
+#undef USE_JEMALLOC_BACKGROUND_THREAD
+#ifndef __APPLE__
+// ARROW-6977: jemalloc's background_thread isn't always enabled on macOS
+#define USE_JEMALLOC_BACKGROUND_THREAD
+#endif
+
+// In debug mode, add memory poisoning on alloc / free
+#ifdef NDEBUG
+#define JEMALLOC_DEBUG_OPTIONS ""
+#else
+#define JEMALLOC_DEBUG_OPTIONS ",junk:true"
+#endif
+
+const char* je_arrow_malloc_conf =
+ ("oversize_threshold:0"
+#ifdef USE_JEMALLOC_BACKGROUND_THREAD
+ ",dirty_decay_ms:1000"
+ ",muzzy_decay_ms:1000"
+ ",background_thread:true"
+#else
+ // ARROW-6994: return memory immediately to the OS if the
+ // background_thread option isn't available
+ ",dirty_decay_ms:0"
+ ",muzzy_decay_ms:0"
+#endif
+ JEMALLOC_DEBUG_OPTIONS); // NOLINT: whitespace/parens
+
+#endif // ARROW_JEMALLOC_VENDORED
+
+namespace arrow {
+
+namespace memory_pool {
+
+namespace internal {
+
+Status JemallocAllocator::AllocateAligned(int64_t size, uint8_t** out) {
+ if (size == 0) {
+ *out = kZeroSizeArea;
+ return Status::OK();
+ }
+ *out = reinterpret_cast<uint8_t*>(
+ mallocx(static_cast<size_t>(size), MALLOCX_ALIGN(kAlignment)));
+ if (*out == NULL) {
+ return Status::OutOfMemory("malloc of size ", size, " failed");
+ }
+ return Status::OK();
+}
+
+Status JemallocAllocator::ReallocateAligned(int64_t old_size, int64_t new_size,
+ uint8_t** ptr) {
+ uint8_t* previous_ptr = *ptr;
+ if (previous_ptr == kZeroSizeArea) {
+ DCHECK_EQ(old_size, 0);
+ return AllocateAligned(new_size, ptr);
+ }
+ if (new_size == 0) {
+ DeallocateAligned(previous_ptr, old_size);
+ *ptr = kZeroSizeArea;
+ return Status::OK();
+ }
+ *ptr = reinterpret_cast<uint8_t*>(
+ rallocx(*ptr, static_cast<size_t>(new_size), MALLOCX_ALIGN(kAlignment)));
+ if (*ptr == NULL) {
+ *ptr = previous_ptr;
+ return Status::OutOfMemory("realloc of size ", new_size, " failed");
+ }
+ return Status::OK();
+}
+
+void JemallocAllocator::DeallocateAligned(uint8_t* ptr, int64_t size) {
+ if (ptr == kZeroSizeArea) {
+ DCHECK_EQ(size, 0);
+ } else {
+ dallocx(ptr, MALLOCX_ALIGN(kAlignment));
+ }
+}
+
+void JemallocAllocator::ReleaseUnused() {
+ mallctl("arena." ARROW_STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL,
NULL, 0);
+}
+
+} // namespace internal
+
+} // namespace memory_pool
+
+#define RETURN_IF_JEMALLOC_ERROR(ERR) \
+ do { \
+ if (err != 0) { \
+ return Status::UnknownError(std::strerror(ERR)); \
+ } \
+ } while (0)
+
+Status jemalloc_set_decay_ms(int ms) {
+ ssize_t decay_time_ms = static_cast<ssize_t>(ms);
+
+ int err = mallctl("arenas.dirty_decay_ms", nullptr, nullptr, &decay_time_ms,
+ sizeof(decay_time_ms));
+ RETURN_IF_JEMALLOC_ERROR(err);
+ err = mallctl("arenas.muzzy_decay_ms", nullptr, nullptr, &decay_time_ms,
+ sizeof(decay_time_ms));
+ RETURN_IF_JEMALLOC_ERROR(err);
+
+ return Status::OK();
+}
+
+#undef RETURN_IF_JEMALLOC_ERROR
+
+} // namespace arrow
diff --git a/cpp/src/arrow/memory_pool_test.cc
b/cpp/src/arrow/memory_pool_test.cc
index 3ea35165f2..591d86a23f 100644
--- a/cpp/src/arrow/memory_pool_test.cc
+++ b/cpp/src/arrow/memory_pool_test.cc
@@ -24,6 +24,7 @@
#include "arrow/memory_pool_test.h"
#include "arrow/status.h"
#include "arrow/testing/gtest_util.h"
+#include "arrow/util/config.h"
namespace arrow {
diff --git a/cpp/src/arrow/util/config.h.cmake
b/cpp/src/arrow/util/config.h.cmake
index bd6447a20e..c4a2a40be2 100644
--- a/cpp/src/arrow/util/config.h.cmake
+++ b/cpp/src/arrow/util/config.h.cmake
@@ -43,6 +43,8 @@
#cmakedefine ARROW_FILESYSTEM
#cmakedefine ARROW_FLIGHT
#cmakedefine ARROW_IPC
+#cmakedefine ARROW_JEMALLOC
+#cmakedefine ARROW_JEMALLOC_VENDORED
#cmakedefine ARROW_JSON
#cmakedefine ARROW_GCS
diff --git
a/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile
b/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile
index 152ac08a7c..42a7327614 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile
@@ -53,6 +53,7 @@ RUN \
libgoogle-glog-dev \
libgrpc++-dev \
libgtest-dev \
+ libjemalloc-dev \
liblz4-dev \
libprotoc-dev \
libprotobuf-dev \
diff --git
a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
b/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
index 440a07f09c..1562f1d662 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
@@ -53,6 +53,7 @@ RUN \
libgoogle-glog-dev \
libgrpc++-dev \
libgtest-dev \
+ libjemalloc-dev \
liblz4-dev \
libprotoc-dev \
libprotobuf-dev \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
b/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
index 83d0089500..ac6ddbe07e 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
@@ -55,6 +55,7 @@ RUN \
libgmock-dev \
libgoogle-glog-dev \
libgtest-dev \
+ libjemalloc-dev \
liblz4-dev \
libre2-dev \
libsnappy-dev \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
index b897fa73a3..64ebb3b72d 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
@@ -48,6 +48,7 @@ RUN \
libglib2.0-doc \
libgoogle-glog-dev \
libgtest-dev \
+ libjemalloc-dev \
liblz4-dev \
libre2-dev \
libsnappy-dev \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
index 5abee7c2e0..9efc560268 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
@@ -50,6 +50,7 @@ RUN \
libgmock-dev \
libgoogle-glog-dev \
libgtest-dev \
+ libjemalloc-dev \
liblz4-dev \
libre2-dev \
libsnappy-dev \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish/Dockerfile
b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish/Dockerfile
index 0959ba5a2a..1b5b3be850 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish/Dockerfile
@@ -51,6 +51,7 @@ RUN \
libgoogle-glog-dev \
libgrpc++-dev \
libgtest-dev \
+ libjemalloc-dev \
liblz4-dev \
libprotoc-dev \
libprotobuf-dev \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile
b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile
index 7e95c37642..e6f657533f 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile
@@ -51,6 +51,7 @@ RUN \
libgoogle-glog-dev \
libgrpc++-dev \
libgtest-dev \
+ libjemalloc-dev \
liblz4-dev \
libprotoc-dev \
libprotobuf-dev \
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in
b/dev/tasks/linux-packages/apache-arrow/debian/control.in
index 27c3e0e31c..6e600e5084 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/control.in
+++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in
@@ -18,6 +18,7 @@ Build-Depends:
libgoogle-glog-dev,
@USE_SYSTEM_GRPC@ libgrpc++-dev,
libgtest-dev,
+ libjemalloc-dev,
liblz4-dev,
@USE_SYSTEM_GRPC@ libprotoc-dev,
@USE_SYSTEM_GRPC@ libprotobuf-dev,
@@ -133,6 +134,7 @@ Depends:
libbz2-dev,
@USE_SYSTEM_C_ARES@ libc-ares-dev,
@USE_SYSTEM_GRPC@ libgrpc++-dev,
+ libjemalloc-dev,
liblz4-dev,
libre2-dev,
libsnappy-dev,
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install
b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install
index 4f600692ce..26de6f7748 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install
@@ -5,6 +5,7 @@ usr/lib/*/cmake/arrow/ArrowTargets*.cmake
usr/lib/*/cmake/arrow/Find*Alt.cmake
usr/lib/*/cmake/arrow/FindArrow.cmake
usr/lib/*/cmake/arrow/FindBrotli.cmake
+usr/lib/*/cmake/arrow/Findjemalloc.cmake
usr/lib/*/cmake/arrow/Find[STuz]*.cmake
usr/lib/*/cmake/arrow/arrow-config.cmake
usr/lib/*/libarrow.a
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules
b/dev/tasks/linux-packages/apache-arrow/debian/rules
index e6845ac5df..901ead88ac 100755
--- a/dev/tasks/linux-packages/apache-arrow/debian/rules
+++ b/dev/tasks/linux-packages/apache-arrow/debian/rules
@@ -61,7 +61,8 @@ override_dh_auto_configure:
-DCUDAToolkit_ROOT=/usr \
-DPARQUET_REQUIRE_ENCRYPTION=ON \
-DPythonInterp_FIND_VERSION=ON \
- -DPythonInterp_FIND_VERSION_MAJOR=3
+ -DPythonInterp_FIND_VERSION_MAJOR=3 \
+ -Djemalloc_SOURCE=SYSTEM
override_dh_auto_build:
dh_auto_build \
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile
b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile
index a75fd022bd..0a37bab4b1 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile
@@ -40,6 +40,7 @@ RUN \
glog-devel \
gobject-introspection-devel \
gtk-doc \
+ jemalloc-devel \
json-devel \
libarchive \
libzstd-devel \
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile
b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile
index df63f7a1a7..8561779f65 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile
@@ -39,6 +39,7 @@ RUN \
git \
gobject-introspection-devel \
gtk-doc \
+ jemalloc-devel \
json-devel \
libarchive \
libzstd-devel \
diff --git
a/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile
b/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile
index 9409b25c3c..b8de7062c2 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile
@@ -39,6 +39,7 @@ RUN \
glog-devel \
gobject-introspection-devel \
gtk-doc \
+ jemalloc-devel \
json-devel \
lz4-devel \
make \
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
index d5bbb93384..449c767cd4 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -103,6 +103,7 @@ BuildRequires: git
%if %{use_glog}
BuildRequires: glog-devel
%endif
+BuildRequires: jemalloc-devel
%if %{use_gcs}
BuildRequires: json-devel
%endif
@@ -177,6 +178,7 @@ cd cpp
%if %{use_s3}
-DARROW_S3=ON \
%endif
+ -DARROW_USE_CCACHE=OFF \
-DARROW_WITH_BROTLI=ON \
-DARROW_WITH_BZ2=ON \
-DARROW_WITH_LZ4=ON \
@@ -184,12 +186,12 @@ cd cpp
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DCMAKE_BUILD_TYPE=$cpp_build_type \
- -DARROW_USE_CCACHE=OFF \
-DPARQUET_REQUIRE_ENCRYPTION=ON \
%if %{use_python}
-DPythonInterp_FIND_VERSION=ON \
-DPythonInterp_FIND_VERSION_MAJOR=3 \
%endif
+ -Djemalloc_SOURCE=SYSTEM \
-G"Unix Makefiles"
%arrow_cmake_build
cd -
@@ -229,6 +231,7 @@ Requires: gflags
%if %{use_glog}
Requires: glog
%endif
+Requires: jemalloc
%if %{have_zstd}
Requires: libzstd
%endif
@@ -260,6 +263,7 @@ Requires: bzip2-devel
%if %{use_flight}
Requires: c-ares-devel
%endif
+Requires: jemalloc-devel
%if %{use_gcs}
Requires: json-devel
%endif
@@ -304,6 +308,7 @@ Libraries and header files for Apache Arrow C++.
%if %{use_flight}
%{_libdir}/cmake/arrow/Findc-aresAlt.cmake
%endif
+%{_libdir}/cmake/arrow/Findjemalloc.cmake
%{_libdir}/cmake/arrow/Findlz4Alt.cmake
%if %{have_re2}
%{_libdir}/cmake/arrow/Findre2Alt.cmake
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
index 9c93e2f240..93d4a7812b 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
@@ -39,6 +39,7 @@ RUN \
glog-devel \
gobject-introspection-devel \
gtk-doc \
+ jemalloc-devel \
json-devel \
libzstd-devel \
lz4-devel \
diff --git
a/dev/tasks/linux-packages/apache-arrow/yum/centos-8-stream/Dockerfile
b/dev/tasks/linux-packages/apache-arrow/yum/centos-8-stream/Dockerfile
index b29cc4565b..ca9e44d409 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-8-stream/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-8-stream/Dockerfile
@@ -40,6 +40,7 @@ RUN \
glog-devel \
gobject-introspection-devel \
gtk-doc \
+ jemalloc-devel \
json-devel \
libarchive \
libzstd-devel \
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index c8b8ae0c71..3a4eff43ab 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -206,6 +206,7 @@ tasks:
flags: >-
-e ARROW_CONAN_PARQUET=True
-e ARROW_CONAN_WITH_LZ4=True
+ -e ARROW_CONAN_WITH_JEMALLOC=True
image: conan
########################### Python Minimal ############################