This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new a946214b12 GH-39973: [C++][CI] Disable debug memory pool for ASAN and 
Valgrind (#39975)
a946214b12 is described below

commit a946214b127ff50ea0cf7e68946c186fa66009a2
Author: Rossi Sun <zanmato1...@gmail.com>
AuthorDate: Fri Feb 9 01:02:04 2024 +0800

    GH-39973: [C++][CI] Disable debug memory pool for ASAN and Valgrind (#39975)
    
    
    
    ### Rationale for this change
    
    Disable debug memory pool for ASAN and Valgrind so that they can detect 
more subtle memory issues regarding to buffer tail bytes.
    
    ### What changes are included in this PR?
    
    1. Add a `none` option to debug memory pool env var to make other things 
slightly easier.
    2. Change `*_test.sh` scripts to conditionally set debug memory pool env 
var.
    3. Top-level docker compose change to pass none to debug memory pool env 
var for ASAN and Valgrind.
    
    ### Are these changes tested?
    
    The CI should cover it well.
    
    ### Are there any user-facing changes?
    
    No.
    
    * Closes: #39973
    
    Authored-by: Ruoxi Sun <zanmato1...@gmail.com>
    Signed-off-by: Antoine Pitrou <anto...@python.org>
---
 ci/appveyor-cpp-build.bat           |  5 ++++-
 ci/scripts/c_glib_test.sh           |  6 ++++--
 ci/scripts/cpp_test.sh              |  6 ++++--
 ci/scripts/python_test.sh           |  6 ++++--
 ci/scripts/r_test.sh                |  6 ++++--
 ci/scripts/ruby_test.sh             |  6 ++++--
 cpp/src/arrow/memory_pool.cc        |  4 ++--
 docker-compose.yml                  |  4 ++++
 docs/source/cpp/env_vars.rst        |  4 +++-
 python/pyarrow/tests/test_memory.py | 30 ++++++++++++++++++++++++++----
 10 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index 5e561a0461..ab85032fe9 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -26,7 +26,10 @@ git submodule update --init || exit /B
 set ARROW_TEST_DATA=%CD%\testing\data
 set PARQUET_TEST_DATA=%CD%\cpp\submodules\parquet-testing\data
 
-set ARROW_DEBUG_MEMORY_POOL=trap
+@rem Enable memory debug checks if the env is not set already
+IF "%ARROW_DEBUG_MEMORY_POOL%"=="" (
+  set ARROW_DEBUG_MEMORY_POOL=trap
+)
 
 set CMAKE_BUILD_PARALLEL_LEVEL=%NUMBER_OF_PROCESSORS%
 set CTEST_PARALLEL_LEVEL=%NUMBER_OF_PROCESSORS%
diff --git a/ci/scripts/c_glib_test.sh b/ci/scripts/c_glib_test.sh
index cea600191a..f8083c7759 100755
--- a/ci/scripts/c_glib_test.sh
+++ b/ci/scripts/c_glib_test.sh
@@ -28,8 +28,10 @@ export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
 export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
 export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0
 
-# Enable memory debug checks.
-export ARROW_DEBUG_MEMORY_POOL=trap
+# Enable memory debug checks if the env is not set already
+if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
+  export ARROW_DEBUG_MEMORY_POOL=trap
+fi
 
 pushd ${source_dir}
 
diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh
index 0c6e1c6ef7..1d685c51a9 100755
--- a/ci/scripts/cpp_test.sh
+++ b/ci/scripts/cpp_test.sh
@@ -37,8 +37,10 @@ export 
LD_LIBRARY_PATH=${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib}:${LD_LIBRARY_P
 # to retrieve metadata. Disable this so that S3FileSystem tests run faster.
 export AWS_EC2_METADATA_DISABLED=TRUE
 
-# Enable memory debug checks.
-export ARROW_DEBUG_MEMORY_POOL=trap
+# Enable memory debug checks if the env is not set already
+if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
+  export ARROW_DEBUG_MEMORY_POOL=trap
+fi
 
 ctest_options=()
 case "$(uname)" in
diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh
index 341c2dd057..8dfedb2880 100755
--- a/ci/scripts/python_test.sh
+++ b/ci/scripts/python_test.sh
@@ -32,8 +32,10 @@ export ARROW_GDB_SCRIPT=${arrow_dir}/cpp/gdb_arrow.py
 # Enable some checks inside Python itself
 export PYTHONDEVMODE=1
 
-# Enable memory debug checks.
-export ARROW_DEBUG_MEMORY_POOL=trap
+# Enable memory debug checks if the env is not set already
+if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
+  export ARROW_DEBUG_MEMORY_POOL=trap
+fi
 
 # By default, force-test all optional components
 : ${PYARROW_TEST_ACERO:=${ARROW_ACERO:-ON}}
diff --git a/ci/scripts/r_test.sh b/ci/scripts/r_test.sh
index 22ec551edb..72078ab3c0 100755
--- a/ci/scripts/r_test.sh
+++ b/ci/scripts/r_test.sh
@@ -72,8 +72,10 @@ export _R_CHECK_STOP_ON_INVALID_NUMERIC_VERSION_INPUTS_=TRUE
 # to retrieve metadata. Disable this so that S3FileSystem tests run faster.
 export AWS_EC2_METADATA_DISABLED=TRUE
 
-# Enable memory debug checks.
-export ARROW_DEBUG_MEMORY_POOL=trap
+# Enable memory debug checks if the env is not set already
+if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
+  export ARROW_DEBUG_MEMORY_POOL=trap
+fi
 
 # Hack so that texlive2020 doesn't pollute the home dir
 export TEXMFCONFIG=/tmp/texmf-config
diff --git a/ci/scripts/ruby_test.sh b/ci/scripts/ruby_test.sh
index 4fd6a85fe3..56c33a4d63 100755
--- a/ci/scripts/ruby_test.sh
+++ b/ci/scripts/ruby_test.sh
@@ -26,7 +26,9 @@ export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
 export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
 export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0
 
-# Enable memory debug checks.
-export ARROW_DEBUG_MEMORY_POOL=trap
+# Enable memory debug checks if the env is not set already
+if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
+  export ARROW_DEBUG_MEMORY_POOL=trap
+fi
 
 rake -f ${source_dir}/Rakefile BUILD_DIR=${build_dir} USE_BUNDLER=yes
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 843329c17b..d58c203d2a 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -195,7 +195,7 @@ bool IsDebugEnabled() {
       return false;
     }
     auto env_value = *std::move(maybe_env_value);
-    if (env_value.empty()) {
+    if (env_value.empty() || env_value == "none") {
       return false;
     }
     auto debug_state = DebugState::Instance();
@@ -212,7 +212,7 @@ bool IsDebugEnabled() {
       return true;
     }
     ARROW_LOG(WARNING) << "Invalid value for " << kDebugMemoryEnvVar << ": '" 
<< env_value
-                       << "'. Valid values are 'abort', 'trap', 'warn'.";
+                       << "'. Valid values are 'abort', 'trap', 'warn', 
'none'.";
     return false;
   }();
 
diff --git a/docker-compose.yml b/docker-compose.yml
index a31fa0d9aa..7ae625a017 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -320,6 +320,8 @@ services:
       # Shrink test runtime by enabling minimal optimizations
       ARROW_C_FLAGS_DEBUG: "-g1 -Og"
       ARROW_CXX_FLAGS_DEBUG: "-g1 -Og"
+      # GH-39973: Do not use debug memory pool for valgrind
+      ARROW_DEBUG_MEMORY_POOL: "none"
       ARROW_ENABLE_TIMING_TESTS:  # inherit
       ARROW_FLIGHT: "OFF"
       ARROW_FLIGHT_SQL: "OFF"
@@ -598,6 +600,8 @@ services:
       CXX: clang++-${CLANG_TOOLS}
       # Avoid creating huge static libraries
       ARROW_BUILD_STATIC: "OFF"
+      # GH-39973: Do not use debug memory pool for ASAN
+      ARROW_DEBUG_MEMORY_POOL: "none"
       ARROW_ENABLE_TIMING_TESTS:  # inherit
       # GH-33920: Disable Flight SQL to reduce build time.
       # We'll be able to re-enable this with Ubuntu 24.04 because
diff --git a/docs/source/cpp/env_vars.rst b/docs/source/cpp/env_vars.rst
index 0fa80aa110..eb7c797df5 100644
--- a/docs/source/cpp/env_vars.rst
+++ b/docs/source/cpp/env_vars.rst
@@ -58,8 +58,10 @@ that changing their value later will have an effect.
    - ``abort`` exits the processus with a non-zero return value;
    - ``trap`` issues a platform-specific debugger breakpoint / trap 
instruction;
    - ``warn`` prints a warning on stderr and continues execution;
+   - ``none`` disables memory checks;
 
-   If this variable is not set, or has empty an value, memory checks are 
disabled.
+   If this variable is not set, or has an empty value, it has the same effect
+   as the value ``none`` - memory checks are disabled.
 
    .. note::
       While this functionality can be useful and has little overhead, it
diff --git a/python/pyarrow/tests/test_memory.py 
b/python/pyarrow/tests/test_memory.py
index d9fdeb152c..4f19995234 100644
--- a/python/pyarrow/tests/test_memory.py
+++ b/python/pyarrow/tests/test_memory.py
@@ -243,13 +243,35 @@ def test_debug_memory_pool_warn(pool_factory):
     assert "Wrong size on deallocation" in res.stderr
 
 
-@pytest.mark.parametrize('pool_factory', supported_factories())
-def test_debug_memory_pool_disabled(pool_factory):
-    res = run_debug_memory_pool(pool_factory.__name__, "")
+def check_debug_memory_pool_disabled(pool_factory, env_value, msg):
+    res = run_debug_memory_pool(pool_factory.__name__, env_value)
     # The subprocess either returned successfully or was killed by a signal
     # (due to writing out of bounds), depending on the underlying allocator.
     if os.name == "posix":
         assert res.returncode <= 0
     else:
         res.check_returncode()
-    assert res.stderr == ""
+    if msg == "":
+        assert res.stderr == ""
+    else:
+        assert msg in res.stderr
+
+
+@pytest.mark.parametrize('pool_factory', supported_factories())
+def test_debug_memory_pool_none(pool_factory):
+    check_debug_memory_pool_disabled(pool_factory, "none", "")
+
+
+@pytest.mark.parametrize('pool_factory', supported_factories())
+def test_debug_memory_pool_empty(pool_factory):
+    check_debug_memory_pool_disabled(pool_factory, "", "")
+
+
+@pytest.mark.parametrize('pool_factory', supported_factories())
+def test_debug_memory_pool_unknown(pool_factory):
+    env_value = "some_arbitrary_value"
+    msg = (
+        f"Invalid value for ARROW_DEBUG_MEMORY_POOL: '{env_value}'. "
+        "Valid values are 'abort', 'trap', 'warn', 'none'."
+    )
+    check_debug_memory_pool_disabled(pool_factory, env_value, msg)

Reply via email to