[Numpy-discussion] Should ndarray be a context manager?

2014-12-09 Thread Sturla Molden

I wonder if ndarray should be a context manager so we can write 
something like this:


with np.zeros(n) as x:
   [...]


The difference should be that __exit__ should free the memory in x (if 
owned by x) and make x a zero size array.

Unlike the current ndarray, which does not have an __exit__ method, this 
would give precise control over when the memory is freed. The timing of 
the memory release would not be dependent on the Python implementation, 
and a reference cycle or reference leak would not accidentally produce a 
memory leak. It would allow us to deterministically decide when the 
memory should be freed, which e.g. is useful when we work with large arrays.


A problem with this is that the memory in the ndarray would be volatile 
with respect to other Python threads and view arrays. However, there are 
dozens of other ways to produce segfaults or buffer overflows with NumPy 
(cf. stride_tricks or wrapping external buffers).


Below is a Cython class that does something similar, but we would need 
to e.g. write something like

 with Heapmem(n * np.double().itemsize) as hm:
 x = hm.doublearray
 [...]

instead of just

 with np.zeros(n) as x:
 [...]


Sturla


# (C) 2014 Sturla Molden

from cpython cimport PyMem_Malloc, PyMem_Free
from libc.string cimport memset
cimport numpy as cnp
cnp.init_array()


cdef class Heapmem:

 cdef:
 void *_pointer
 cnp.intp_t _size

 def __cinit__(Heapmem self, Py_ssize_t n):
 self._pointer = NULL
 self._size = cnp.intp_t n

 def __init__(Heapmem self, Py_ssize_t n):
 self.allocate()

 def allocate(Heapmem self):
 if self._pointer != NULL:
 raise RuntimeError(Memory already allocated)
 else:
 self._pointer = PyMem_Malloc(self._size)
 if (self._pointer == NULL):
 raise MemoryError()
 memset(self._pointer, 0, self._size)

 def __dealloc__(Heapmem self):
 if self._pointer != NULL:
 PyMem_Free(self._pointer)
 self._pointer = NULL

 property pointer:
 def __get__(Heapmem self):
 return cnp.intp_t self._pointer

 property doublearray:
 def __get__(Heapmem self):
 cdef cnp.intp_t n = self._size//sizeof(double)
 if self._pointer != NULL:
 return cnp.PyArray_SimpleNewFromData(1, n,
  cnp.NPY_DOUBLE, self._pointer)
 else:
 raise RuntimeError(Memory not allocated)

 property chararray:
 def __get__(Heapmem self):
 if self._pointer != NULL:
 return cnp.PyArray_SimpleNewFromData(1, self._size,
  cnp.NPY_CHAR, self._pointer)
 else:
 raise RuntimeError(Memory not allocated)

 def __enter__(self):
 if self._pointer != NULL:
 raise RuntimeError(Memory not allocated)

 def __exit__(Heapmem self, type, value, traceback):
 if self._pointer != NULL:
 PyMem_Free(self._pointer)
 self._pointer = NULL





___
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion


Re: [Numpy-discussion] Should ndarray be a context manager?

2014-12-09 Thread Eelco Hoogendoorn
My impression is that this level of optimization does and should not fall 
within the scope of numpy.. 

-Original Message-
From: Sturla Molden sturla.mol...@gmail.com
Sent: ‎9-‎12-‎2014 16:02
To: numpy-discussion@scipy.org numpy-discussion@scipy.org
Subject: [Numpy-discussion] Should ndarray be a context manager?


I wonder if ndarray should be a context manager so we can write 
something like this:


with np.zeros(n) as x:
   [...]


The difference should be that __exit__ should free the memory in x (if 
owned by x) and make x a zero size array.

Unlike the current ndarray, which does not have an __exit__ method, this 
would give precise control over when the memory is freed. The timing of 
the memory release would not be dependent on the Python implementation, 
and a reference cycle or reference leak would not accidentally produce a 
memory leak. It would allow us to deterministically decide when the 
memory should be freed, which e.g. is useful when we work with large arrays.


A problem with this is that the memory in the ndarray would be volatile 
with respect to other Python threads and view arrays. However, there are 
dozens of other ways to produce segfaults or buffer overflows with NumPy 
(cf. stride_tricks or wrapping external buffers).


Below is a Cython class that does something similar, but we would need 
to e.g. write something like

 with Heapmem(n * np.double().itemsize) as hm:
 x = hm.doublearray
 [...]

instead of just

 with np.zeros(n) as x:
 [...]


Sturla


# (C) 2014 Sturla Molden

from cpython cimport PyMem_Malloc, PyMem_Free
from libc.string cimport memset
cimport numpy as cnp
cnp.init_array()


cdef class Heapmem:

 cdef:
 void *_pointer
 cnp.intp_t _size

 def __cinit__(Heapmem self, Py_ssize_t n):
 self._pointer = NULL
 self._size = cnp.intp_t n

 def __init__(Heapmem self, Py_ssize_t n):
 self.allocate()

 def allocate(Heapmem self):
 if self._pointer != NULL:
 raise RuntimeError(Memory already allocated)
 else:
 self._pointer = PyMem_Malloc(self._size)
 if (self._pointer == NULL):
 raise MemoryError()
 memset(self._pointer, 0, self._size)

 def __dealloc__(Heapmem self):
 if self._pointer != NULL:
 PyMem_Free(self._pointer)
 self._pointer = NULL

 property pointer:
 def __get__(Heapmem self):
 return cnp.intp_t self._pointer

 property doublearray:
 def __get__(Heapmem self):
 cdef cnp.intp_t n = self._size//sizeof(double)
 if self._pointer != NULL:
 return cnp.PyArray_SimpleNewFromData(1, n,
  cnp.NPY_DOUBLE, self._pointer)
 else:
 raise RuntimeError(Memory not allocated)

 property chararray:
 def __get__(Heapmem self):
 if self._pointer != NULL:
 return cnp.PyArray_SimpleNewFromData(1, self._size,
  cnp.NPY_CHAR, self._pointer)
 else:
 raise RuntimeError(Memory not allocated)

 def __enter__(self):
 if self._pointer != NULL:
 raise RuntimeError(Memory not allocated)

 def __exit__(Heapmem self, type, value, traceback):
 if self._pointer != NULL:
 PyMem_Free(self._pointer)
 self._pointer = NULL





___
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion
___
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion


Re: [Numpy-discussion] help using np.correlate to produce correlograms.

2014-12-09 Thread Pierre Haessig
Hi,

Le 08/12/2014 22:02, Jose Guzman a écrit :
 I'm trying to compute the cross correlation and cross correlograms from
 some signals. For that, I'm testing  first np.correlate with some
 idealized traces (sine waves) that are exactly 1 ms  separated from each
 other. You can have a look here:

 http://nbviewer.ipython.org/github/JoseGuzman/myIPythonNotebooks/blob/master/Signal_Processing/Cross%20correlation.ipynb

 Unfortunately I am not able to retrieve the correct lag of 1 ms for the
 option 'full'. Strange enough, if I perform an autocorrelation of any of
 the signals,I obtain the correct value for a lags =0 ms. I' think I'm
 doing something wrong to obtain the lags.
I looked at your Notebook and I believe that you had an error in the 
definition of the delay. In you first cell, you were creating of delay 
of 20ms instead of 1ms (and because the sine is periodic, this was not 
obvious).

In addition, to get a good estimation of the delay with cross 
correlation, you need many perdiods.

Here is a modification of your notebook : 
http://nbviewer.ipython.org/gist/pierre-haessig/e2dda384ae0e08943f9a
I've updated the delay definition and the number of periods.

Finally, you may be able to automate a bit your plot by using 
matplotlib's xcorr (which uses np.correlate)
http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.xcorr

best,
Pierre
___
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion


[Numpy-discussion] should unpackbits take a dtype?

2014-12-09 Thread Alan G Isaac
As the question asks:
should `unpackbits` add a dtype argument?

At the moment I'm interest in unpacking as a boolean array.

Alan Isaac
___
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion


Re: [Numpy-discussion] Should ndarray be a context manager?

2014-12-09 Thread Julian Taylor
I don't think that makes much sense, context managers are useful for
managing the lifetime of objects owning resources not already managed by
the garbage collector.
E.g. file descriptors, a gc has no clue that a piece of memory contains
a descriptor and thus never has a reason to release it in time when
there is plenty of memory available.

Memory on the other hand is the resource a gc manages, so it should
release objects when memory pressure is high.

Also numpy only supports CPython so we don't even need to care about
that. A context manager will also not help you with reference cycles.

On 09.12.2014 16:01, Sturla Molden wrote:
 
 I wonder if ndarray should be a context manager so we can write 
 something like this:
 
 
 with np.zeros(n) as x:
[...]
 
 
 The difference should be that __exit__ should free the memory in x (if 
 owned by x) and make x a zero size array.
 
 Unlike the current ndarray, which does not have an __exit__ method, this 
 would give precise control over when the memory is freed. The timing of 
 the memory release would not be dependent on the Python implementation, 
 and a reference cycle or reference leak would not accidentally produce a 
 memory leak. It would allow us to deterministically decide when the 
 memory should be freed, which e.g. is useful when we work with large arrays.
 
 
 A problem with this is that the memory in the ndarray would be volatile 
 with respect to other Python threads and view arrays. However, there are 
 dozens of other ways to produce segfaults or buffer overflows with NumPy 
 (cf. stride_tricks or wrapping external buffers).
 
 
 Below is a Cython class that does something similar, but we would need 
 to e.g. write something like
 
  with Heapmem(n * np.double().itemsize) as hm:
  x = hm.doublearray
  [...]
 
 instead of just
 
  with np.zeros(n) as x:
  [...]
 
 
 Sturla
 
 
 # (C) 2014 Sturla Molden
 
 from cpython cimport PyMem_Malloc, PyMem_Free
 from libc.string cimport memset
 cimport numpy as cnp
 cnp.init_array()
 
 
 cdef class Heapmem:
 
  cdef:
  void *_pointer
  cnp.intp_t _size
 
  def __cinit__(Heapmem self, Py_ssize_t n):
  self._pointer = NULL
  self._size = cnp.intp_t n
 
  def __init__(Heapmem self, Py_ssize_t n):
  self.allocate()
 
  def allocate(Heapmem self):
  if self._pointer != NULL:
  raise RuntimeError(Memory already allocated)
  else:
  self._pointer = PyMem_Malloc(self._size)
  if (self._pointer == NULL):
  raise MemoryError()
  memset(self._pointer, 0, self._size)
 
  def __dealloc__(Heapmem self):
  if self._pointer != NULL:
  PyMem_Free(self._pointer)
  self._pointer = NULL
 
  property pointer:
  def __get__(Heapmem self):
  return cnp.intp_t self._pointer
 
  property doublearray:
  def __get__(Heapmem self):
  cdef cnp.intp_t n = self._size//sizeof(double)
  if self._pointer != NULL:
  return cnp.PyArray_SimpleNewFromData(1, n,
   cnp.NPY_DOUBLE, self._pointer)
  else:
  raise RuntimeError(Memory not allocated)
 
  property chararray:
  def __get__(Heapmem self):
  if self._pointer != NULL:
  return cnp.PyArray_SimpleNewFromData(1, self._size,
   cnp.NPY_CHAR, self._pointer)
  else:
  raise RuntimeError(Memory not allocated)
 
  def __enter__(self):
  if self._pointer != NULL:
  raise RuntimeError(Memory not allocated)
 
  def __exit__(Heapmem self, type, value, traceback):
  if self._pointer != NULL:
  PyMem_Free(self._pointer)
  self._pointer = NULL
 
 
 
 
 
 ___
 NumPy-Discussion mailing list
 NumPy-Discussion@scipy.org
 http://mail.scipy.org/mailman/listinfo/numpy-discussion
 

___
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion


Re: [Numpy-discussion] Should ndarray be a context manager?

2014-12-09 Thread Sturla Molden
On 09/12/14 18:39, Julian Taylor wrote:

 A context manager will also not help you with reference cycles.

If will because __exit__ is always executed. Even if the PyArrayObject 
struct lingers, the data buffer will be released.

Sturla


___
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion


Re: [Numpy-discussion] Should ndarray be a context manager?

2014-12-09 Thread Robert Kern
On Tue, Dec 9, 2014 at 5:57 PM, Julian Taylor jtaylor.deb...@googlemail.com
wrote:

 On 09.12.2014 18:55, Sturla Molden wrote:
  On 09/12/14 18:39, Julian Taylor wrote:
 
  A context manager will also not help you with reference cycles.
 
  If will because __exit__ is always executed. Even if the PyArrayObject
  struct lingers, the data buffer will be released.

 a exit function would not delete the buffer, only decrease the reference
 count of the array. If something else still holds a reference it stays
 valid.
 Otherwise you would end up with a crash when the other object holding a
 reference tries to access it.

I believe that Sturla is proposing that the buffer (the data pointer) will
indeed be free()ed and the ndarray object be modified in-place to have an
empty shape. Most references won't matter, because they are opaque; e.g. a
frame being held by a caught traceback somewhere or whatever. These are
frequently the references that keep alive large arrays when we don't them
to, and are hard to track down. The only place where you will get a crasher
is when other ndarray views on the original array are still around because
those are not opaque references.

The main problem I have is that this is much too likely to cause a segfault
to be part of the main API for ndarrays. I perhaps wouldn't mind a
non-public-API function hidden in numpy somewhere (but not in numpy.* or
even numpy.*.*) that did this. The user would put it into a finally:
clause, if that such things matters to them, instead of using it as a
context manager. Like as_strided(), which creates similar potential for
crashes, it should be not be casually available. This kind of action needs
to be an explicit statement
yes_i_dont_need_this_memory_anymore_references_be_damned() rather than
implicitly hidden behind generic syntax.

--
Robert Kern
___
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion


Re: [Numpy-discussion] Should ndarray be a context manager?

2014-12-09 Thread Chris Barker
On Tue, Dec 9, 2014 at 7:01 AM, Sturla Molden sturla.mol...@gmail.com
wrote:


 I wonder if ndarray should be a context manager so we can write
 something like this:


 with np.zeros(n) as x:
[...]


 The difference should be that __exit__ should free the memory in x (if
 owned by x) and make x a zero size array.


my  first thought iust that you can just do:

x = np.zeros(n)
[... your code here ]
del x

x's ref count will go down, and it will be deleted  if there are no other
references to it. If there Are other references to it, you really wouldn't
want to delete the memory buffer anyway, would you?

At it happens cPython's reference counting scheme DOES enforce deletion at
determinate times.

I suppose you could write a generic context manger that would do the del
for you, but I'm not sure what the point would be.

Note that id numpy were to do this, then there would need to be machinery
in place to check for null data blocks in a numpy array -- kind of like how
a file object can close the underlying file pointer and not crash if
someone tries to use it again.

I guess this comes down to -- why would anyone want/need a numpy array
object with no underlying data?

(although I'm still confused as to why it's so important (in cPython) to
have a file context manager..)

-CHB







 Unlike the current ndarray, which does not have an __exit__ method, this
 would give precise control over when the memory is freed. The timing of
 the memory release would not be dependent on the Python implementation,
 and a reference cycle or reference leak would not accidentally produce a
 memory leak. It would allow us to deterministically decide when the
 memory should be freed, which e.g. is useful when we work with large
 arrays.


 A problem with this is that the memory in the ndarray would be volatile
 with respect to other Python threads and view arrays. However, there are
 dozens of other ways to produce segfaults or buffer overflows with NumPy
 (cf. stride_tricks or wrapping external buffers).


 Below is a Cython class that does something similar, but we would need
 to e.g. write something like

  with Heapmem(n * np.double().itemsize) as hm:
  x = hm.doublearray
  [...]

 instead of just

  with np.zeros(n) as x:
  [...]


 Sturla


 # (C) 2014 Sturla Molden

 from cpython cimport PyMem_Malloc, PyMem_Free
 from libc.string cimport memset
 cimport numpy as cnp
 cnp.init_array()


 cdef class Heapmem:

  cdef:
  void *_pointer
  cnp.intp_t _size

  def __cinit__(Heapmem self, Py_ssize_t n):
  self._pointer = NULL
  self._size = cnp.intp_t n

  def __init__(Heapmem self, Py_ssize_t n):
  self.allocate()

  def allocate(Heapmem self):
  if self._pointer != NULL:
  raise RuntimeError(Memory already allocated)
  else:
  self._pointer = PyMem_Malloc(self._size)
  if (self._pointer == NULL):
  raise MemoryError()
  memset(self._pointer, 0, self._size)

  def __dealloc__(Heapmem self):
  if self._pointer != NULL:
  PyMem_Free(self._pointer)
  self._pointer = NULL

  property pointer:
  def __get__(Heapmem self):
  return cnp.intp_t self._pointer

  property doublearray:
  def __get__(Heapmem self):
  cdef cnp.intp_t n = self._size//sizeof(double)
  if self._pointer != NULL:
  return cnp.PyArray_SimpleNewFromData(1, n,
   cnp.NPY_DOUBLE, self._pointer)
  else:
  raise RuntimeError(Memory not allocated)

  property chararray:
  def __get__(Heapmem self):
  if self._pointer != NULL:
  return cnp.PyArray_SimpleNewFromData(1, self._size,
   cnp.NPY_CHAR, self._pointer)
  else:
  raise RuntimeError(Memory not allocated)

  def __enter__(self):
  if self._pointer != NULL:
  raise RuntimeError(Memory not allocated)

  def __exit__(Heapmem self, type, value, traceback):
  if self._pointer != NULL:
  PyMem_Free(self._pointer)
  self._pointer = NULL





 ___
 NumPy-Discussion mailing list
 NumPy-Discussion@scipy.org
 http://mail.scipy.org/mailman/listinfo/numpy-discussion




-- 

Christopher Barker, Ph.D.
Oceanographer

Emergency Response Division
NOAA/NOS/ORR(206) 526-6959   voice
7600 Sand Point Way NE   (206) 526-6329   fax
Seattle, WA  98115   (206) 526-6317   main reception

chris.bar...@noaa.gov
___
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion


Re: [Numpy-discussion] Should ndarray be a context manager?

2014-12-09 Thread Robert Kern
On Tue, Dec 9, 2014 at 8:15 PM, Chris Barker chris.bar...@noaa.gov wrote:

 (although I'm still confused as to why it's so important (in cPython) to
 have a file context manager..)


Because you want the file to close when the exception is raised and not at
some indeterminate point thereafter when the traceback stack frames finally
get disposed of, which can be an indefinitely long time.

-- 
Robert Kern
___
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion


Re: [Numpy-discussion] Should ndarray be a context manager?

2014-12-09 Thread Sturla Molden
Chris Barker chris.bar...@noaa.gov wrote:

 my  first thought iust that you can just do:
 
 x = np.zeros(n)
 [... your code here ]
 del x
 
 x's ref count will go down, and it will be deleted  if there are no other
 references to it. 

1. This depends on reference counting. PyPy supports numpy too (albeit with
its own code) and does not reference count.

2. del does not delete, it just decrements the refcount. x can still be
kept alive,

3. If x is a part of a reference cycle it is reclaimed later on.


 If there Are other references to it, you really wouldn't
 want to delete the memory buffer anyway, would you?

Same thing for file descriptors. For example consider what happens if you
memory map a file, then close the file, but continue to read and write to
the mapped address. NumPy allows us to construct these circumstances if we
want to.

 
 I suppose you could write a generic context manger that would do the del
 for you, but I'm not sure what the point would be.

A del is very different from a deallocation that actually disposes of the
data buffer, regardless of references to the memory that might still be
alive.


 I guess this comes down to -- why would anyone want/need a numpy array
 object with no underlying data?

I don't. The PyArrayObject struct is so small that I don't care about it.
But it could reference a huge data buffer, and I might want to get rid of
that more deterministically than just waiting for the gc.


 (although I'm still confused as to why it's so important (in cPython) to
 have a file context manager..)

Because we often want to run setup and teardown code deterministically,
rather than e.g. having it happen at random from the gc thread when it runs
the finalizer. If Python raises an exception, a io.file object can be kept
alive by the traceback for decades. If Python raises an exception, a an
acquire/release pair for a threading.Lock can be separated, and the lock
ends up in an undefined state further down in your code.

In what I suggested the setup and teardown code would be malloc() and
free().


Sturla

___
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion


Re: [Numpy-discussion] Should ndarray be a context manager?

2014-12-09 Thread Nathaniel Smith
On 9 Dec 2014 15:03, Sturla Molden sturla.mol...@gmail.com wrote:


 I wonder if ndarray should be a context manager so we can write
 something like this:


 with np.zeros(n) as x:
[...]


 The difference should be that __exit__ should free the memory in x (if
 owned by x) and make x a zero size array.

Regardless of whether this functionality is provided as part of numpy, I
don't much like the idea of putting __enter__ and __exit__ methods on
ndarray itself. It's just very confusing - I had no idea what 'with arr'
would mean when I saw the thread subject. It's much clearer and easier to
document if one uses a special context manager just for this, like:

with tmp_zeros(...) as arr:
...

This should be pretty trivial to implement. AFAICT you don't need any
complicated cython, you just need:

@contextmanager
def tmp_zeros(*args, **kwargs):
arr = np.zeros(*args, **kwargs)
try:
yield arr
finally:
arr.resize((0,), check_refs=False)

Given how intrinsically dangerous this is, and how easily it can be
implemented using numpy's existing public API, I think maybe we should
leave this for third-party daredevils instead of implementing it in numpy
proper.

-n
___
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion


Re: [Numpy-discussion] Should ndarray be a context manager?

2014-12-09 Thread Sturla Molden
Nathaniel Smith n...@pobox.com wrote:

 @contextmanager
 def tmp_zeros(*args, **kwargs):
 arr = np.zeros(*args, **kwargs)
 try:
 yield arr
 finally:
 arr.resize((0,), check_refs=False)

That one is interesting. I have actually never used ndarray.resize(). It
did not even occur to me that such an abomination existed :-)

Sturla

___
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion


Re: [Numpy-discussion] Should ndarray be a context manager?

2014-12-09 Thread Sturla Molden
Nathaniel Smith n...@pobox.com wrote:

 This should be pretty trivial to implement. AFAICT you don't need any
 complicated cython

I have a bad habit of thinking in terms of too complicated C instead of
just using NumPy.


 @contextmanager
 def tmp_zeros(*args, **kwargs):
 arr = np.zeros(*args, **kwargs)
 try:
 yield arr
 finally:
 arr.resize((0,), check_refs=False)
 
 Given how intrinsically dangerous this is, and how easily it can be
 implemented using numpy's existing public API, I think maybe we should
 leave this for third-party daredevils instead of implementing it in numpy
 proper.

It seems so :-)


Sturla

___
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion