#13387: Improve MonoDict and TripleDict data structures
----------------------------------+-----------------------------------------
       Reporter:  nbruin          |         Owner:  Nils Bruin
           Type:  enhancement     |        Status:  needs_work
       Priority:  major           |     Milestone:  sage-5.8  
      Component:  memleak         |    Resolution:            
       Keywords:                  |   Work issues:  rebase    
Report Upstream:  N/A             |     Reviewers:            
        Authors:  Nils Bruin      |     Merged in:            
   Dependencies:  #11521, #12313  |      Stopgaps:            
----------------------------------+-----------------------------------------
Changes (by SimonKing):

  * status:  needs_review => needs_work
  * work_issues:  => rebase


Comment:

 It seems that #12313 is merged in sage-5.8.beta0. But the first patch does
 not apply, 2 out of 30 hunks fail.
 {{{
 #!diff
 --- coerce_dict.pyx
 +++ coerce_dict.pyx
 @@ -115,27 +137,31 @@ cdef class MonoDictEraser:
          # and it knows the stored key of the unique singleton r() had
 been part
          # of.
          # We remove that unique tuple from self.D -- if self.D is still
 there!
 -        cdef MonoDict D = self.D()
 +
 +        #WARNING! These callbacks can happen during the addition of items
 to the same
 +        #dictionary. The addition code may then be in the process of
 adding a new entry
 +        #(one PyList_Append call at a time) to the relevant bucket.
 +        #The "PyList_GET_SIZE(bucket) by 3" call should mean that we
 round down and hence not look
 +        #at incomplete entries. Furthermore, deleting a slice out of a
 buck should still be OK.
 +        #this callback code should absolutely not resize the dictionary,
 because that would wreak
 +        #complete havoc.
 +
 +        cdef MonoDict D = <object> PyWeakref_GetObject(self.D)
          if D is None:
              return
          cdef list buckets = D.buckets
          if buckets is None:
              return
 -        cdef size_t k = r.key
 -        cdef size_t h = k % PyList_GET_SIZE(buckets)
 -        cdef list bucket = <object>PyList_GET_ITEM(buckets,h)
 +        cdef Py_ssize_t h = r.key
 +        cdef list bucket = <object>PyList_GET_ITEM(buckets, (<size_t>h) %
 PyList_GET_SIZE(buckets))
          cdef Py_ssize_t i
 -        for i from 0 <= i < PyList_GET_SIZE(bucket) by 2:
 -            if <size_t><object>PyList_GET_ITEM(bucket,i)==k:
 -                del bucket[i:i+2]
 +        for i from 0 <= i < PyList_GET_SIZE(bucket) by 3:
 +            if PyInt_AsSsize_t(PyList_GET_ITEM(bucket,i))==h:
 +                del bucket[i:i+3]
                  D._size -= 1
                  break
 -        try:
 -            D._refcache.__delitem__(k)
 -        except KeyError:
 -            pass

 -cdef class TripleDictEraser(MonoDictEraser):
 +cdef class TripleDictEraser:
      """
      Erases items from a :class:`TripleDict` when a weak reference becomes
      invalid.
 @@ -216,32 +251,30 @@ cdef class TripleDictEraser(MonoDictEras
          # r is a (weak) reference (typically to a parent), and it knows
 the
          # stored key of the unique triple r() had been part of.
          # We remove that unique triple from self.D
 -        cdef size_t k1,k2,k3
 +        cdef Py_ssize_t k1,k2,k3
          k1,k2,k3 = r.key
 -        cdef size_t h = (k1 + 13*k2 ^ 503*k3)
 -        cdef list bucket = <object>PyList_GET_ITEM(buckets, h %
 PyList_GET_SIZE(buckets))
 +        cdef Py_ssize_t h = (k1 + 13*k2 ^ 503*k3)
 +        cdef list bucket = <object>PyList_GET_ITEM(buckets, (<size_t>h) %
 PyList_GET_SIZE(buckets))
          cdef Py_ssize_t i
 -        for i from 0 <= i < PyList_GET_SIZE(bucket) by 4:
 -            if <size_t><object>PyList_GET_ITEM(bucket, i)==k1 and \
 -               <size_t><object>PyList_GET_ITEM(bucket, i+1)==k2 and \
 -               <size_t><object>PyList_GET_ITEM(bucket, i+2)==k3:
 -                del bucket[i:i+4]
 +        for i from 0 <= i < PyList_GET_SIZE(bucket) by 7:
 +            if PyInt_AsSsize_t(PyList_GET_ITEM(bucket, i))==k1 and \
 +               PyInt_AsSsize_t(PyList_GET_ITEM(bucket, i+1))==k2 and \
 +               PyInt_AsSsize_t(PyList_GET_ITEM(bucket, i+2))==k3:
 +                del bucket[i:i+7]
                  D._size -= 1
                  break
 -        try:
 -            D._refcache.__delitem__((k1,k2,k3))
 -        except KeyError:
 -            pass

  cdef class MonoDict:
      """
      This is a hashtable specifically designed for (read) speed in
      the coercion model.

 -    It differs from a python dict in the following important way:
 +    It differs from a python WeakKeyDictionary in the following important
 way:

         - Comparison is done using the 'is' rather than '==' operator.
 -       - There are only weak references (if possible) to the keys.
 +       - Only weak references to the keys are stored if at all possible.
 +         Keys that do not allow for weak references are stored with a
 normal
 +         refcounted reference.

      There are special cdef set/get methods for faster access.
      It is bare-bones in the sense that not all dictionary methods are
 }}}

-- 
Ticket URL: <http://trac.sagemath.org/sage_trac/ticket/13387#comment:42>
Sage <http://www.sagemath.org>
Sage: Creating a Viable Open Source Alternative to Magma, Maple, Mathematica, 
and MATLAB

-- 
You received this message because you are subscribed to the Google Groups 
"sage-trac" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/sage-trac?hl=en.
For more options, visit https://groups.google.com/groups/opt_out.


Reply via email to