This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new f9a6edac9f GH-45006: [CI][Python] Fix test_memory failures (#45007)
f9a6edac9f is described below

commit f9a6edac9f175de3ad993887470dd1dff4f151c1
Author: Antoine Pitrou <[email protected]>
AuthorDate: Fri Dec 13 16:36:02 2024 +0100

    GH-45006: [CI][Python] Fix test_memory failures (#45007)
    
    `test_memory.py` has started failing on some builds after 
https://github.com/apache/arrow/pull/44951 was merged
    
    * GitHub Issue: #45006
    
    Lead-authored-by: Antoine Pitrou <[email protected]>
    Co-authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 ci/docker/conda-cpp.dockerfile        |  3 ++-
 ci/docker/debian-12-cpp.dockerfile    |  1 +
 ci/docker/fedora-39-cpp.dockerfile    |  1 +
 ci/docker/ubuntu-20.04-cpp.dockerfile |  1 +
 ci/docker/ubuntu-22.04-cpp.dockerfile |  1 +
 ci/docker/ubuntu-24.04-cpp.dockerfile |  1 +
 ci/scripts/cpp_build.sh               |  4 ++--
 docker-compose.yml                    |  2 ++
 python/pyarrow/tests/test_memory.py   | 44 +++++++++++++++--------------------
 9 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile
index f0084894e1..6d4be52bae 100644
--- a/ci/docker/conda-cpp.dockerfile
+++ b/ci/docker/conda-cpp.dockerfile
@@ -48,7 +48,7 @@ ENV PIPX_BASE_PYTHON=/opt/conda/bin/python3
 COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
 RUN /arrow/ci/scripts/install_gcs_testbench.sh default
 
-# Ensure npm, node and azurite are on path. npm and node are required to 
install azurite, which will then need to 
+# Ensure npm, node and azurite are on path. npm and node are required to 
install azurite, which will then need to
 # be on the path for the tests to run.
 ENV PATH=/opt/conda/envs/arrow/bin:$PATH
 
@@ -68,6 +68,7 @@ ENV ARROW_ACERO=ON \
     ARROW_GANDIVA=ON \
     ARROW_GCS=ON \
     ARROW_HOME=$CONDA_PREFIX \
+    ARROW_JEMALLOC=ON \
     ARROW_ORC=ON \
     ARROW_PARQUET=ON \
     ARROW_S3=ON \
diff --git a/ci/docker/debian-12-cpp.dockerfile 
b/ci/docker/debian-12-cpp.dockerfile
index 354e7829cc..f486d07ff8 100644
--- a/ci/docker/debian-12-cpp.dockerfile
+++ b/ci/docker/debian-12-cpp.dockerfile
@@ -124,6 +124,7 @@ ENV ARROW_ACERO=ON \
     ARROW_GANDIVA=ON \
     ARROW_GCS=ON \
     ARROW_HOME=/usr/local \
+    ARROW_JEMALLOC=ON \
     ARROW_ORC=ON \
     ARROW_PARQUET=ON \
     ARROW_S3=ON \
diff --git a/ci/docker/fedora-39-cpp.dockerfile 
b/ci/docker/fedora-39-cpp.dockerfile
index 52e879aba4..6c5edd444e 100644
--- a/ci/docker/fedora-39-cpp.dockerfile
+++ b/ci/docker/fedora-39-cpp.dockerfile
@@ -87,6 +87,7 @@ ENV ARROW_ACERO=ON \
     ARROW_GANDIVA=ON \
     ARROW_GCS=ON \
     ARROW_HOME=/usr/local \
+    ARROW_JEMALLOC=ON \
     ARROW_ORC=ON \
     ARROW_PARQUET=ON \
     ARROW_S3=ON \
diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile 
b/ci/docker/ubuntu-20.04-cpp.dockerfile
index ec8c9840cf..8dc778d544 100644
--- a/ci/docker/ubuntu-20.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp.dockerfile
@@ -161,6 +161,7 @@ ENV absl_SOURCE=BUNDLED \
     ARROW_HDFS=ON \
     ARROW_HOME=/usr/local \
     ARROW_INSTALL_NAME_RPATH=OFF \
+    ARROW_JEMALLOC=ON \
     ARROW_ORC=ON \
     ARROW_PARQUET=ON \
     ARROW_S3=ON \
diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile 
b/ci/docker/ubuntu-22.04-cpp.dockerfile
index 78a44b0119..2e4d658bf9 100644
--- a/ci/docker/ubuntu-22.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp.dockerfile
@@ -205,6 +205,7 @@ ENV absl_SOURCE=BUNDLED \
     ARROW_HDFS=ON \
     ARROW_HOME=/usr/local \
     ARROW_INSTALL_NAME_RPATH=OFF \
+    ARROW_JEMALLOC=ON \
     ARROW_ORC=ON \
     ARROW_PARQUET=ON \
     ARROW_S3=ON \
diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile 
b/ci/docker/ubuntu-24.04-cpp.dockerfile
index 8cb7f9d5f6..53113bccfe 100644
--- a/ci/docker/ubuntu-24.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-24.04-cpp.dockerfile
@@ -190,6 +190,7 @@ ENV ARROW_ACERO=ON \
     ARROW_HDFS=ON \
     ARROW_HOME=/usr/local \
     ARROW_INSTALL_NAME_RPATH=OFF \
+    ARROW_JEMALLOC=ON \
     ARROW_ORC=ON \
     ARROW_PARQUET=ON \
     ARROW_S3=ON \
diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh
index e70f5da85a..c1e7adf6a0 100755
--- a/ci/scripts/cpp_build.sh
+++ b/ci/scripts/cpp_build.sh
@@ -171,10 +171,10 @@ else
     -DARROW_GCS=${ARROW_GCS:-OFF} \
     -DARROW_HDFS=${ARROW_HDFS:-ON} \
     -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \
-    -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \
+    -DARROW_JEMALLOC=${ARROW_JEMALLOC:-OFF} \
     -DARROW_JSON=${ARROW_JSON:-ON} \
     -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \
-    -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \
+    -DARROW_MIMALLOC=${ARROW_MIMALLOC:-ON} \
     -DARROW_ORC=${ARROW_ORC:-OFF} \
     -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \
     -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \
diff --git a/docker-compose.yml b/docker-compose.yml
index 4911a30752..7aabbb43b4 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -639,6 +639,7 @@ services:
       ARROW_FLIGHT_SQL: "OFF"
       ARROW_FUZZING: "ON"  # Check fuzz regressions
       ARROW_JEMALLOC: "OFF"
+      ARROW_MIMALLOC: "OFF"
       ARROW_ORC: "OFF"
       ARROW_S3: "OFF"
       ARROW_USE_ASAN: "ON"
@@ -677,6 +678,7 @@ services:
       ARROW_FLIGHT: "OFF"
       ARROW_FLIGHT_SQL: "OFF"
       ARROW_JEMALLOC: "OFF"
+      ARROW_MIMALLOC: "OFF"
       ARROW_ORC: "OFF"
       ARROW_USE_TSAN: "ON"
     command: *cpp-command
diff --git a/python/pyarrow/tests/test_memory.py 
b/python/pyarrow/tests/test_memory.py
index b1eef17666..6ed999db42 100644
--- a/python/pyarrow/tests/test_memory.py
+++ b/python/pyarrow/tests/test_memory.py
@@ -17,7 +17,6 @@
 
 import contextlib
 import os
-import platform
 import signal
 import subprocess
 import sys
@@ -30,15 +29,19 @@ import pytest
 pytestmark = pytest.mark.processes
 
 possible_backends = ["system", "jemalloc", "mimalloc"]
+# Backends which are expected to be present in all builds of PyArrow,
+# except if the user manually recompiled Arrow C++.
+mandatory_backends = ["system", "mimalloc"]
 
-should_have_jemalloc = (sys.platform == "linux" and platform.machine() == 
'x86_64')
-should_have_mimalloc = sys.platform == "win32"
+
+def backend_factory(backend_name):
+    return getattr(pa, f"{backend_name}_memory_pool")
 
 
 def supported_factories():
     yield pa.default_memory_pool
-    for backend in pa.supported_memory_backends():
-        yield getattr(pa, f"{backend}_memory_pool")
+    for backend_name in pa.supported_memory_backends():
+        yield backend_factory(backend_name)
 
 
 @contextlib.contextmanager
@@ -149,17 +152,12 @@ def check_env_var(name, expected, *, 
expect_warning=False):
 
 
 def test_env_var():
-    check_env_var("system", ["system"])
-    if should_have_jemalloc:
-        check_env_var("jemalloc", ["jemalloc"])
-    if should_have_mimalloc:
-        check_env_var("mimalloc", ["mimalloc"])
+    for backend_name in mandatory_backends:
+        check_env_var(backend_name, [backend_name])
     check_env_var("nonexistent", possible_backends, expect_warning=True)
 
 
-def test_specific_memory_pools():
-    specific_pools = set()
-
+def test_memory_pool_factories():
     def check(factory, name, *, can_fail=False):
         if can_fail:
             try:
@@ -169,23 +167,16 @@ def test_specific_memory_pools():
         else:
             pool = factory()
         assert pool.backend_name == name
-        specific_pools.add(pool)
 
-    check(pa.system_memory_pool, "system")
-    check(pa.jemalloc_memory_pool, "jemalloc",
-          can_fail=not should_have_jemalloc)
-    check(pa.mimalloc_memory_pool, "mimalloc",
-          can_fail=not should_have_mimalloc)
+    for backend_name in possible_backends:
+        check(backend_factory(backend_name), backend_name,
+              can_fail=backend_name not in mandatory_backends)
 
 
 def test_supported_memory_backends():
     backends = pa.supported_memory_backends()
-
-    assert "system" in backends
-    if should_have_jemalloc:
-        assert "jemalloc" in backends
-    if should_have_mimalloc:
-        assert "mimalloc" in backends
+    assert set(backends) >= set(mandatory_backends)
+    assert set(backends) <= set(possible_backends)
 
 
 def run_debug_memory_pool(pool_factory, env_value):
@@ -246,6 +237,9 @@ def test_debug_memory_pool_warn(pool_factory):
 
 
 def check_debug_memory_pool_disabled(pool_factory, env_value, msg):
+    if sys.maxsize < 2**32:
+        # GH-45011: mimalloc may print warnings in this test on 32-bit Linux, 
ignore.
+        pytest.skip("Test may fail on 32-bit platforms")
     res = run_debug_memory_pool(pool_factory.__name__, env_value)
     # The subprocess either returned successfully or was killed by a signal
     # (due to writing out of bounds), depending on the underlying allocator.

Reply via email to