This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new f9a6edac9f GH-45006: [CI][Python] Fix test_memory failures (#45007)
f9a6edac9f is described below
commit f9a6edac9f175de3ad993887470dd1dff4f151c1
Author: Antoine Pitrou <[email protected]>
AuthorDate: Fri Dec 13 16:36:02 2024 +0100
GH-45006: [CI][Python] Fix test_memory failures (#45007)
`test_memory.py` has started failing on some builds after
https://github.com/apache/arrow/pull/44951 was merged
* GitHub Issue: #45006
Lead-authored-by: Antoine Pitrou <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
ci/docker/conda-cpp.dockerfile | 3 ++-
ci/docker/debian-12-cpp.dockerfile | 1 +
ci/docker/fedora-39-cpp.dockerfile | 1 +
ci/docker/ubuntu-20.04-cpp.dockerfile | 1 +
ci/docker/ubuntu-22.04-cpp.dockerfile | 1 +
ci/docker/ubuntu-24.04-cpp.dockerfile | 1 +
ci/scripts/cpp_build.sh | 4 ++--
docker-compose.yml | 2 ++
python/pyarrow/tests/test_memory.py | 44 +++++++++++++++--------------------
9 files changed, 30 insertions(+), 28 deletions(-)
diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile
index f0084894e1..6d4be52bae 100644
--- a/ci/docker/conda-cpp.dockerfile
+++ b/ci/docker/conda-cpp.dockerfile
@@ -48,7 +48,7 @@ ENV PIPX_BASE_PYTHON=/opt/conda/bin/python3
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
RUN /arrow/ci/scripts/install_gcs_testbench.sh default
-# Ensure npm, node and azurite are on path. npm and node are required to
install azurite, which will then need to
+# Ensure npm, node and azurite are on path. npm and node are required to
install azurite, which will then need to
# be on the path for the tests to run.
ENV PATH=/opt/conda/envs/arrow/bin:$PATH
@@ -68,6 +68,7 @@ ENV ARROW_ACERO=ON \
ARROW_GANDIVA=ON \
ARROW_GCS=ON \
ARROW_HOME=$CONDA_PREFIX \
+ ARROW_JEMALLOC=ON \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
diff --git a/ci/docker/debian-12-cpp.dockerfile
b/ci/docker/debian-12-cpp.dockerfile
index 354e7829cc..f486d07ff8 100644
--- a/ci/docker/debian-12-cpp.dockerfile
+++ b/ci/docker/debian-12-cpp.dockerfile
@@ -124,6 +124,7 @@ ENV ARROW_ACERO=ON \
ARROW_GANDIVA=ON \
ARROW_GCS=ON \
ARROW_HOME=/usr/local \
+ ARROW_JEMALLOC=ON \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
diff --git a/ci/docker/fedora-39-cpp.dockerfile
b/ci/docker/fedora-39-cpp.dockerfile
index 52e879aba4..6c5edd444e 100644
--- a/ci/docker/fedora-39-cpp.dockerfile
+++ b/ci/docker/fedora-39-cpp.dockerfile
@@ -87,6 +87,7 @@ ENV ARROW_ACERO=ON \
ARROW_GANDIVA=ON \
ARROW_GCS=ON \
ARROW_HOME=/usr/local \
+ ARROW_JEMALLOC=ON \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile
b/ci/docker/ubuntu-20.04-cpp.dockerfile
index ec8c9840cf..8dc778d544 100644
--- a/ci/docker/ubuntu-20.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp.dockerfile
@@ -161,6 +161,7 @@ ENV absl_SOURCE=BUNDLED \
ARROW_HDFS=ON \
ARROW_HOME=/usr/local \
ARROW_INSTALL_NAME_RPATH=OFF \
+ ARROW_JEMALLOC=ON \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile
b/ci/docker/ubuntu-22.04-cpp.dockerfile
index 78a44b0119..2e4d658bf9 100644
--- a/ci/docker/ubuntu-22.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp.dockerfile
@@ -205,6 +205,7 @@ ENV absl_SOURCE=BUNDLED \
ARROW_HDFS=ON \
ARROW_HOME=/usr/local \
ARROW_INSTALL_NAME_RPATH=OFF \
+ ARROW_JEMALLOC=ON \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile
b/ci/docker/ubuntu-24.04-cpp.dockerfile
index 8cb7f9d5f6..53113bccfe 100644
--- a/ci/docker/ubuntu-24.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-24.04-cpp.dockerfile
@@ -190,6 +190,7 @@ ENV ARROW_ACERO=ON \
ARROW_HDFS=ON \
ARROW_HOME=/usr/local \
ARROW_INSTALL_NAME_RPATH=OFF \
+ ARROW_JEMALLOC=ON \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh
index e70f5da85a..c1e7adf6a0 100755
--- a/ci/scripts/cpp_build.sh
+++ b/ci/scripts/cpp_build.sh
@@ -171,10 +171,10 @@ else
-DARROW_GCS=${ARROW_GCS:-OFF} \
-DARROW_HDFS=${ARROW_HDFS:-ON} \
-DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \
- -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \
+ -DARROW_JEMALLOC=${ARROW_JEMALLOC:-OFF} \
-DARROW_JSON=${ARROW_JSON:-ON} \
-DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \
- -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \
+ -DARROW_MIMALLOC=${ARROW_MIMALLOC:-ON} \
-DARROW_ORC=${ARROW_ORC:-OFF} \
-DARROW_PARQUET=${ARROW_PARQUET:-OFF} \
-DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \
diff --git a/docker-compose.yml b/docker-compose.yml
index 4911a30752..7aabbb43b4 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -639,6 +639,7 @@ services:
ARROW_FLIGHT_SQL: "OFF"
ARROW_FUZZING: "ON" # Check fuzz regressions
ARROW_JEMALLOC: "OFF"
+ ARROW_MIMALLOC: "OFF"
ARROW_ORC: "OFF"
ARROW_S3: "OFF"
ARROW_USE_ASAN: "ON"
@@ -677,6 +678,7 @@ services:
ARROW_FLIGHT: "OFF"
ARROW_FLIGHT_SQL: "OFF"
ARROW_JEMALLOC: "OFF"
+ ARROW_MIMALLOC: "OFF"
ARROW_ORC: "OFF"
ARROW_USE_TSAN: "ON"
command: *cpp-command
diff --git a/python/pyarrow/tests/test_memory.py
b/python/pyarrow/tests/test_memory.py
index b1eef17666..6ed999db42 100644
--- a/python/pyarrow/tests/test_memory.py
+++ b/python/pyarrow/tests/test_memory.py
@@ -17,7 +17,6 @@
import contextlib
import os
-import platform
import signal
import subprocess
import sys
@@ -30,15 +29,19 @@ import pytest
pytestmark = pytest.mark.processes
possible_backends = ["system", "jemalloc", "mimalloc"]
+# Backends which are expected to be present in all builds of PyArrow,
+# except if the user manually recompiled Arrow C++.
+mandatory_backends = ["system", "mimalloc"]
-should_have_jemalloc = (sys.platform == "linux" and platform.machine() ==
'x86_64')
-should_have_mimalloc = sys.platform == "win32"
+
+def backend_factory(backend_name):
+ return getattr(pa, f"{backend_name}_memory_pool")
def supported_factories():
yield pa.default_memory_pool
- for backend in pa.supported_memory_backends():
- yield getattr(pa, f"{backend}_memory_pool")
+ for backend_name in pa.supported_memory_backends():
+ yield backend_factory(backend_name)
@contextlib.contextmanager
@@ -149,17 +152,12 @@ def check_env_var(name, expected, *,
expect_warning=False):
def test_env_var():
- check_env_var("system", ["system"])
- if should_have_jemalloc:
- check_env_var("jemalloc", ["jemalloc"])
- if should_have_mimalloc:
- check_env_var("mimalloc", ["mimalloc"])
+ for backend_name in mandatory_backends:
+ check_env_var(backend_name, [backend_name])
check_env_var("nonexistent", possible_backends, expect_warning=True)
-def test_specific_memory_pools():
- specific_pools = set()
-
+def test_memory_pool_factories():
def check(factory, name, *, can_fail=False):
if can_fail:
try:
@@ -169,23 +167,16 @@ def test_specific_memory_pools():
else:
pool = factory()
assert pool.backend_name == name
- specific_pools.add(pool)
- check(pa.system_memory_pool, "system")
- check(pa.jemalloc_memory_pool, "jemalloc",
- can_fail=not should_have_jemalloc)
- check(pa.mimalloc_memory_pool, "mimalloc",
- can_fail=not should_have_mimalloc)
+ for backend_name in possible_backends:
+ check(backend_factory(backend_name), backend_name,
+ can_fail=backend_name not in mandatory_backends)
def test_supported_memory_backends():
backends = pa.supported_memory_backends()
-
- assert "system" in backends
- if should_have_jemalloc:
- assert "jemalloc" in backends
- if should_have_mimalloc:
- assert "mimalloc" in backends
+ assert set(backends) >= set(mandatory_backends)
+ assert set(backends) <= set(possible_backends)
def run_debug_memory_pool(pool_factory, env_value):
@@ -246,6 +237,9 @@ def test_debug_memory_pool_warn(pool_factory):
def check_debug_memory_pool_disabled(pool_factory, env_value, msg):
+ if sys.maxsize < 2**32:
+ # GH-45011: mimalloc may print warnings in this test on 32-bit Linux,
ignore.
+ pytest.skip("Test may fail on 32-bit platforms")
res = run_debug_memory_pool(pool_factory.__name__, env_value)
# The subprocess either returned successfully or was killed by a signal
# (due to writing out of bounds), depending on the underlying allocator.