This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new ed8e566  ARROW-2808: [Python] Add MemoryPool tests
ed8e566 is described below

commit ed8e5660818d7b097f1e4be16354283d55262668
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Oct 10 13:26:56 2018 +0200

    ARROW-2808: [Python] Add MemoryPool tests
    
    Also expand the exposed API a bit.
    
    Author: Antoine Pitrou <[email protected]>
    
    Closes #2725 from pitrou/ARROW-2808-python-memory-pool-tests and squashes 
the following commits:
    
    7507891d <Antoine Pitrou> Re-use common constructor test
    463e52d4 <Antoine Pitrou> ARROW-2808:  Add MemoryPool tests
---
 python/pyarrow/__init__.py           |  7 +--
 python/pyarrow/includes/libarrow.pxd |  1 +
 python/pyarrow/memory.pxi            | 53 +++++++++++++++++-----
 python/pyarrow/tests/test_io.py      | 12 -----
 python/pyarrow/tests/test_memory.py  | 85 ++++++++++++++++++++++++++++++++++++
 python/pyarrow/tests/test_misc.py    |  5 ++-
 6 files changed, 135 insertions(+), 28 deletions(-)

diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 12d78c8..54ee3a0 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -96,9 +96,10 @@ from pyarrow.lib import (null, bool_,
 from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
                          compress, decompress, allocate_buffer)
 
-from pyarrow.lib import (MemoryPool, ProxyMemoryPool, total_allocated_bytes,
-                         set_memory_pool, default_memory_pool,
-                         log_memory_allocations)
+from pyarrow.lib import (MemoryPool, LoggingMemoryPool, ProxyMemoryPool,
+                         total_allocated_bytes, set_memory_pool,
+                         default_memory_pool, logging_memory_pool,
+                         proxy_memory_pool, log_memory_allocations)
 
 from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
                          FixedSizeBufferWriter,
diff --git a/python/pyarrow/includes/libarrow.pxd 
b/python/pyarrow/includes/libarrow.pxd
index 94aa41d..e4090b2 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -187,6 +187,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
     cdef cppclass CMemoryPool" arrow::MemoryPool":
         int64_t bytes_allocated()
+        int64_t max_memory()
 
     cdef cppclass CLoggingMemoryPool" arrow::LoggingMemoryPool"(CMemoryPool):
         CLoggingMemoryPool(CMemoryPool*)
diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi
index e99955c..7fa6d79 100644
--- a/python/pyarrow/memory.pxi
+++ b/python/pyarrow/memory.pxi
@@ -23,15 +23,31 @@
 cdef class MemoryPool:
 
     def __init__(self):
-        raise TypeError("Do not call {}'s constructor directly"
+        raise TypeError("Do not call {}'s constructor directly, "
+                        "use pyarrow.*_memory_pool instead."
                         .format(self.__class__.__name__))
 
     cdef void init(self, CMemoryPool* pool):
         self.pool = pool
 
     def bytes_allocated(self):
+        """
+        Return the number of bytes that are currently allocated from this
+        memory pool.
+        """
         return self.pool.bytes_allocated()
 
+    def max_memory(self):
+        """
+        Return the peak memory allocation in this memory pool.
+        This can be an approximate number in multi-threaded applications.
+
+        None is returned if the pool implementation doesn't know how to
+        compute this number.
+        """
+        ret = self.pool.max_memory()
+        return ret if ret >= 0 else None
+
 
 cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool):
     if memory_pool is None:
@@ -45,13 +61,10 @@ cdef class LoggingMemoryPool(MemoryPool):
         unique_ptr[CLoggingMemoryPool] logging_pool
 
     def __init__(self):
-        raise TypeError("Do not call {}'s constructor directly"
+        raise TypeError("Do not call {}'s constructor directly, "
+                        "use pyarrow.logging_memory_pool instead."
                         .format(self.__class__.__name__))
 
-    def __cinit__(self, MemoryPool pool):
-        self.logging_pool.reset(new CLoggingMemoryPool(pool.pool))
-        self.init(self.logging_pool.get())
-
 
 cdef class ProxyMemoryPool(MemoryPool):
     """
@@ -62,8 +75,8 @@ cdef class ProxyMemoryPool(MemoryPool):
         unique_ptr[CProxyMemoryPool] proxy_pool
 
     def __init__(self):
-        raise TypeError("Do not call {}'s constructor directly. "
-                        "Use pyarrow.proxy_memory_pool instead."
+        raise TypeError("Do not call {}'s constructor directly, "
+                        "use pyarrow.proxy_memory_pool instead."
                         .format(self.__class__.__name__))
 
 
@@ -76,8 +89,8 @@ def default_memory_pool():
 
 def proxy_memory_pool(MemoryPool parent):
     """
-    Derived MemoryPool class that tracks the number of bytes and
-    maximum memory allocated through its direct calls.
+    Create and return a MemoryPool instance that redirects to the
+    *parent*, but with separate allocation statistics.
     """
     cdef ProxyMemoryPool out = ProxyMemoryPool.__new__(ProxyMemoryPool)
     out.proxy_pool.reset(new CProxyMemoryPool(parent.pool))
@@ -85,13 +98,25 @@ def proxy_memory_pool(MemoryPool parent):
     return out
 
 
+def logging_memory_pool(MemoryPool parent):
+    """
+    Create and return a MemoryPool instance that redirects to the
+    *parent*, but also dumps allocation logs on stderr.
+    """
+    cdef LoggingMemoryPool out = LoggingMemoryPool.__new__(
+        LoggingMemoryPool, parent)
+    out.logging_pool.reset(new CLoggingMemoryPool(parent.pool))
+    out.init(out.logging_pool.get())
+    return out
+
+
 def set_memory_pool(MemoryPool pool):
     c_set_default_memory_pool(pool.pool)
 
 
 cdef MemoryPool _default_memory_pool = default_memory_pool()
-cdef LoggingMemoryPool _logging_memory_pool = LoggingMemoryPool.__new__(
-    LoggingMemoryPool, _default_memory_pool)
+cdef LoggingMemoryPool _logging_memory_pool = logging_memory_pool(
+    _default_memory_pool)
 
 
 def log_memory_allocations(enable=True):
@@ -110,5 +135,9 @@ def log_memory_allocations(enable=True):
 
 
 def total_allocated_bytes():
+    """
+    Return the currently allocated bytes from the default memory pool.
+    Other memory pools may not be accounted for.
+    """
     cdef CMemoryPool* pool = c_get_memory_pool()
     return pool.bytes_allocated()
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index f5e5339..4908250 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -257,18 +257,6 @@ def test_python_file_closing():
 
 
 # ----------------------------------------------------------------------
-# MemoryPool
-
-
-def test_memory_pool_cannot_use_ctor():
-    with pytest.raises(TypeError):
-        pa.MemoryPool()
-
-    with pytest.raises(TypeError):
-        pa.ProxyMemoryPool()
-
-
-# ----------------------------------------------------------------------
 # Buffers
 
 
diff --git a/python/pyarrow/tests/test_memory.py 
b/python/pyarrow/tests/test_memory.py
new file mode 100644
index 0000000..14b8703
--- /dev/null
+++ b/python/pyarrow/tests/test_memory.py
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import contextlib
+
+import pyarrow as pa
+
+
[email protected]
+def allocate_bytes(pool, nbytes):
+    """
+    Temporarily allocate *nbytes* from the given *pool*.
+    """
+    arr = pa.array([b"x" * nbytes], type=pa.binary(), memory_pool=pool)
+    # Fetch the values buffer from the varbinary array and release the rest,
+    # to get the desired allocation amount
+    buf = arr.buffers()[2]
+    arr = None
+    assert len(buf) == nbytes
+    try:
+        yield
+    finally:
+        buf = None
+
+
+def check_allocated_bytes(pool):
+    """
+    Check allocation stats on *pool*.
+    """
+    allocated_before = pool.bytes_allocated()
+    max_mem_before = pool.max_memory()
+    with allocate_bytes(pool, 512):
+        assert pool.bytes_allocated() == allocated_before + 512
+        new_max_memory = pool.max_memory()
+        assert pool.max_memory() >= max_mem_before
+    assert pool.bytes_allocated() == allocated_before
+    assert pool.max_memory() == new_max_memory
+
+
+def test_default_allocated_bytes():
+    pool = pa.default_memory_pool()
+    with allocate_bytes(pool, 1024):
+        check_allocated_bytes(pool)
+        assert pool.bytes_allocated() == pa.total_allocated_bytes()
+
+
+def test_proxy_memory_pool():
+    pool = pa.proxy_memory_pool(pa.default_memory_pool())
+    check_allocated_bytes(pool)
+
+
+def test_logging_memory_pool(capfd):
+    pool = pa.logging_memory_pool(pa.default_memory_pool())
+    check_allocated_bytes(pool)
+    out, err = capfd.readouterr()
+    assert err == ""
+    assert out.count("Allocate:") > 0
+    assert out.count("Allocate:") == out.count("Free:")
+
+
+def test_set_memory_pool():
+    old_pool = pa.default_memory_pool()
+    pool = pa.proxy_memory_pool(old_pool)
+    pa.set_memory_pool(pool)
+    try:
+        allocated_before = pool.bytes_allocated()
+        with allocate_bytes(None, 512):
+            assert pool.bytes_allocated() == allocated_before + 512
+        assert pool.bytes_allocated() == allocated_before
+    finally:
+        pa.set_memory_pool(old_pool)
diff --git a/python/pyarrow/tests/test_misc.py 
b/python/pyarrow/tests/test_misc.py
index 58d5f7d..1c384f3 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -117,7 +117,10 @@ def test_cpu_count():
     pa.StructValue,
     pa.DictionaryValue,
     pa.ipc.Message,
-    pa.ipc.MessageReader
+    pa.ipc.MessageReader,
+    pa.MemoryPool,
+    pa.LoggingMemoryPool,
+    pa.ProxyMemoryPool,
 ])
 def test_extension_type_constructor_errors(klass):
     # ARROW-2638: prevent calling extension class constructors directly

Reply via email to