This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new ed8e566 ARROW-2808: [Python] Add MemoryPool tests
ed8e566 is described below
commit ed8e5660818d7b097f1e4be16354283d55262668
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Oct 10 13:26:56 2018 +0200
ARROW-2808: [Python] Add MemoryPool tests
Also expand the exposed API a bit.
Author: Antoine Pitrou <[email protected]>
Closes #2725 from pitrou/ARROW-2808-python-memory-pool-tests and squashes
the following commits:
7507891d <Antoine Pitrou> Re-use common constructor test
463e52d4 <Antoine Pitrou> ARROW-2808: Add MemoryPool tests
---
python/pyarrow/__init__.py | 7 +--
python/pyarrow/includes/libarrow.pxd | 1 +
python/pyarrow/memory.pxi | 53 +++++++++++++++++-----
python/pyarrow/tests/test_io.py | 12 -----
python/pyarrow/tests/test_memory.py | 85 ++++++++++++++++++++++++++++++++++++
python/pyarrow/tests/test_misc.py | 5 ++-
6 files changed, 135 insertions(+), 28 deletions(-)
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 12d78c8..54ee3a0 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -96,9 +96,10 @@ from pyarrow.lib import (null, bool_,
from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
compress, decompress, allocate_buffer)
-from pyarrow.lib import (MemoryPool, ProxyMemoryPool, total_allocated_bytes,
- set_memory_pool, default_memory_pool,
- log_memory_allocations)
+from pyarrow.lib import (MemoryPool, LoggingMemoryPool, ProxyMemoryPool,
+ total_allocated_bytes, set_memory_pool,
+ default_memory_pool, logging_memory_pool,
+ proxy_memory_pool, log_memory_allocations)
from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
FixedSizeBufferWriter,
diff --git a/python/pyarrow/includes/libarrow.pxd
b/python/pyarrow/includes/libarrow.pxd
index 94aa41d..e4090b2 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -187,6 +187,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CMemoryPool" arrow::MemoryPool":
int64_t bytes_allocated()
+ int64_t max_memory()
cdef cppclass CLoggingMemoryPool" arrow::LoggingMemoryPool"(CMemoryPool):
CLoggingMemoryPool(CMemoryPool*)
diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi
index e99955c..7fa6d79 100644
--- a/python/pyarrow/memory.pxi
+++ b/python/pyarrow/memory.pxi
@@ -23,15 +23,31 @@
cdef class MemoryPool:
def __init__(self):
- raise TypeError("Do not call {}'s constructor directly"
+ raise TypeError("Do not call {}'s constructor directly, "
+ "use pyarrow.*_memory_pool instead."
.format(self.__class__.__name__))
cdef void init(self, CMemoryPool* pool):
self.pool = pool
def bytes_allocated(self):
+ """
+ Return the number of bytes that are currently allocated from this
+ memory pool.
+ """
return self.pool.bytes_allocated()
+ def max_memory(self):
+ """
+ Return the peak memory allocation in this memory pool.
+ This can be an approximate number in multi-threaded applications.
+
+ None is returned if the pool implementation doesn't know how to
+ compute this number.
+ """
+ ret = self.pool.max_memory()
+ return ret if ret >= 0 else None
+
cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool):
if memory_pool is None:
@@ -45,13 +61,10 @@ cdef class LoggingMemoryPool(MemoryPool):
unique_ptr[CLoggingMemoryPool] logging_pool
def __init__(self):
- raise TypeError("Do not call {}'s constructor directly"
+ raise TypeError("Do not call {}'s constructor directly, "
+ "use pyarrow.logging_memory_pool instead."
.format(self.__class__.__name__))
- def __cinit__(self, MemoryPool pool):
- self.logging_pool.reset(new CLoggingMemoryPool(pool.pool))
- self.init(self.logging_pool.get())
-
cdef class ProxyMemoryPool(MemoryPool):
"""
@@ -62,8 +75,8 @@ cdef class ProxyMemoryPool(MemoryPool):
unique_ptr[CProxyMemoryPool] proxy_pool
def __init__(self):
- raise TypeError("Do not call {}'s constructor directly. "
- "Use pyarrow.proxy_memory_pool instead."
+ raise TypeError("Do not call {}'s constructor directly, "
+ "use pyarrow.proxy_memory_pool instead."
.format(self.__class__.__name__))
@@ -76,8 +89,8 @@ def default_memory_pool():
def proxy_memory_pool(MemoryPool parent):
"""
- Derived MemoryPool class that tracks the number of bytes and
- maximum memory allocated through its direct calls.
+ Create and return a MemoryPool instance that redirects to the
+ *parent*, but with separate allocation statistics.
"""
cdef ProxyMemoryPool out = ProxyMemoryPool.__new__(ProxyMemoryPool)
out.proxy_pool.reset(new CProxyMemoryPool(parent.pool))
@@ -85,13 +98,25 @@ def proxy_memory_pool(MemoryPool parent):
return out
+def logging_memory_pool(MemoryPool parent):
+ """
+ Create and return a MemoryPool instance that redirects to the
+ *parent*, but also dumps allocation logs on stderr.
+ """
+ cdef LoggingMemoryPool out = LoggingMemoryPool.__new__(
+ LoggingMemoryPool, parent)
+ out.logging_pool.reset(new CLoggingMemoryPool(parent.pool))
+ out.init(out.logging_pool.get())
+ return out
+
+
def set_memory_pool(MemoryPool pool):
c_set_default_memory_pool(pool.pool)
cdef MemoryPool _default_memory_pool = default_memory_pool()
-cdef LoggingMemoryPool _logging_memory_pool = LoggingMemoryPool.__new__(
- LoggingMemoryPool, _default_memory_pool)
+cdef LoggingMemoryPool _logging_memory_pool = logging_memory_pool(
+ _default_memory_pool)
def log_memory_allocations(enable=True):
@@ -110,5 +135,9 @@ def log_memory_allocations(enable=True):
def total_allocated_bytes():
+ """
+ Return the currently allocated bytes from the default memory pool.
+ Other memory pools may not be accounted for.
+ """
cdef CMemoryPool* pool = c_get_memory_pool()
return pool.bytes_allocated()
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index f5e5339..4908250 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -257,18 +257,6 @@ def test_python_file_closing():
# ----------------------------------------------------------------------
-# MemoryPool
-
-
-def test_memory_pool_cannot_use_ctor():
- with pytest.raises(TypeError):
- pa.MemoryPool()
-
- with pytest.raises(TypeError):
- pa.ProxyMemoryPool()
-
-
-# ----------------------------------------------------------------------
# Buffers
diff --git a/python/pyarrow/tests/test_memory.py
b/python/pyarrow/tests/test_memory.py
new file mode 100644
index 0000000..14b8703
--- /dev/null
+++ b/python/pyarrow/tests/test_memory.py
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import contextlib
+
+import pyarrow as pa
+
+
[email protected]
+def allocate_bytes(pool, nbytes):
+ """
+ Temporarily allocate *nbytes* from the given *pool*.
+ """
+ arr = pa.array([b"x" * nbytes], type=pa.binary(), memory_pool=pool)
+ # Fetch the values buffer from the varbinary array and release the rest,
+ # to get the desired allocation amount
+ buf = arr.buffers()[2]
+ arr = None
+ assert len(buf) == nbytes
+ try:
+ yield
+ finally:
+ buf = None
+
+
+def check_allocated_bytes(pool):
+ """
+ Check allocation stats on *pool*.
+ """
+ allocated_before = pool.bytes_allocated()
+ max_mem_before = pool.max_memory()
+ with allocate_bytes(pool, 512):
+ assert pool.bytes_allocated() == allocated_before + 512
+ new_max_memory = pool.max_memory()
+ assert pool.max_memory() >= max_mem_before
+ assert pool.bytes_allocated() == allocated_before
+ assert pool.max_memory() == new_max_memory
+
+
+def test_default_allocated_bytes():
+ pool = pa.default_memory_pool()
+ with allocate_bytes(pool, 1024):
+ check_allocated_bytes(pool)
+ assert pool.bytes_allocated() == pa.total_allocated_bytes()
+
+
+def test_proxy_memory_pool():
+ pool = pa.proxy_memory_pool(pa.default_memory_pool())
+ check_allocated_bytes(pool)
+
+
+def test_logging_memory_pool(capfd):
+ pool = pa.logging_memory_pool(pa.default_memory_pool())
+ check_allocated_bytes(pool)
+ out, err = capfd.readouterr()
+ assert err == ""
+ assert out.count("Allocate:") > 0
+ assert out.count("Allocate:") == out.count("Free:")
+
+
+def test_set_memory_pool():
+ old_pool = pa.default_memory_pool()
+ pool = pa.proxy_memory_pool(old_pool)
+ pa.set_memory_pool(pool)
+ try:
+ allocated_before = pool.bytes_allocated()
+ with allocate_bytes(None, 512):
+ assert pool.bytes_allocated() == allocated_before + 512
+ assert pool.bytes_allocated() == allocated_before
+ finally:
+ pa.set_memory_pool(old_pool)
diff --git a/python/pyarrow/tests/test_misc.py
b/python/pyarrow/tests/test_misc.py
index 58d5f7d..1c384f3 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -117,7 +117,10 @@ def test_cpu_count():
pa.StructValue,
pa.DictionaryValue,
pa.ipc.Message,
- pa.ipc.MessageReader
+ pa.ipc.MessageReader,
+ pa.MemoryPool,
+ pa.LoggingMemoryPool,
+ pa.ProxyMemoryPool,
])
def test_extension_type_constructor_errors(klass):
# ARROW-2638: prevent calling extension class constructors directly