[pypy-commit] pypy concurrent-marksweep: Bah. A few hours of gdb'ing a pypy-c turned out this bug, that

arigo Wed, 12 Oct 2011 12:37:29 -0700

Author: Armin Rigo <[email protected]>
Branch: concurrent-marksweep
Changeset: r47976:89b12cccb3b1
Date: 2011-10-12 19:09 +0200
http://bitbucket.org/pypy/pypy/changeset/89b12cccb3b1/


Log:    Bah. A few hours of gdb'ing a pypy-c turned out this bug, that I
        explain in a one-page comment.

diff --git a/pypy/rpython/memory/gc/concurrentms.py 
b/pypy/rpython/memory/gc/concurrentms.py
--- a/pypy/rpython/memory/gc/concurrentms.py
+++ b/pypy/rpython/memory/gc/concurrentms.py
@@ -432,7 +432,9 @@
     #    XXX
 
     #def shrink_array(self, obj, smallerlength):
-    #    return False
+    #    no real point in supporting this, but if you think it's a good
+    #    idea, remember that changing the array length at run-time needs
+    #    extra care for the collector thread
 
     def enumerate_all_roots(self, callback, arg):
         self.prebuilt_root_objects.foreach(callback, arg)
@@ -471,6 +473,7 @@
     grow_reservation._always_inline_ = True
 
     def deletion_barrier(self, addr_struct):
+        # XXX check the assembler
         mark = self.header(addr_struct).tid & 0xFF
         if mark != self.current_mark:
             self.force_scan(addr_struct)
@@ -506,8 +509,16 @@
                     ll_assert(self.collection_running == 1,
                               "write barrier: wrong call?")
                     #
+                    # It's fine to set the mark before tracing, because
+                    # we are anyway in a 'mutex_lock' critical section.
+                    # The collector thread will not exist the phase
+                    # 'collection_running == 1' here.
                     self.set_mark(obj, self.current_mark)
                     self.trace(obj, self._barrier_add_extra, None)
+                    #
+                    # Still at 1:
+                    ll_assert(self.collection_running == 1,
+                              "write barrier: oups!?")
                 #
             self.release(self.mutex_lock)
             #debug_print("deletion_barrier done ", obj)
@@ -516,7 +527,9 @@
         self.force_scan = force_scan
 
     def _barrier_add_extra(self, root, ignored):
-        self.extra_objects_to_mark.append(root.address[0])
+        obj = root.address[0]
+        self.get_mark(obj)
+        self.extra_objects_to_mark.append(obj)
 
 
     def wait_for_the_end_of_collection(self):
@@ -571,6 +584,8 @@
             # can start the next collection, and then this function returns
             # with a collection in progress, which it should not.  Be careful
             # to call execute_finalizers_ll() in the caller somewhere.
+            ll_assert(self.collection_running == 0,
+                      "collector thread not paused?")
 
 
     def execute_finalizers_ll(self):
@@ -637,6 +652,7 @@
             x = llarena.getfakearenaaddress(x) + 8
             obj = x + self.gcheaderbuilder.size_gc_header
             #debug_print("_objects_with_finalizers_to_run", obj)
+            self.get_mark(obj)
             self.gray_objects.append(obj)
             p = list_next(p)
         #
@@ -672,10 +688,12 @@
     def _add_stack_root(self, root):
         obj = root.address[0]
         #debug_print("_add_stack_root", obj)
+        self.get_mark(obj)
         self.gray_objects.append(obj)
 
     def _add_prebuilt_root(self, obj, ignored):
         #debug_print("_add_prebuilt_root", obj)
+        self.get_mark(obj)
         self.gray_objects.append(obj)
 
     def debug_check_lists(self):
@@ -780,14 +798,17 @@
         return mark ^ (MARK_VALUE_1 ^ MARK_VALUE_2)
 
     def is_marked(self, obj, current_mark):
-        mark = self.header(obj).tid & 0xFF
-        ll_assert(mark in (MARK_VALUE_1, MARK_VALUE_2, MARK_VALUE_STATIC),
-                  "bad mark byte in object")
-        return mark == current_mark
+        return self.get_mark(obj) == current_mark
 
     def set_mark(self, obj, newmark):
         _set_mark(self.header(obj), newmark)
 
+    def get_mark(self, obj):
+        mark = self.header(obj).tid & 0xFF
+        ll_assert(mark == MARK_VALUE_1 or mark == MARK_VALUE_2 or
+                  mark == MARK_VALUE_STATIC, "bad mark byte in object")
+        return mark
+
     def collector_mark(self):
         while True:
             #
@@ -804,6 +825,7 @@
             #debug_print("...collector thread has mutex_lock")
             while self.extra_objects_to_mark.non_empty():
                 obj = self.extra_objects_to_mark.pop()
+                self.get_mark(obj)
                 self.gray_objects.append(obj)
             self.release(self.mutex_lock)
             #
@@ -821,8 +843,30 @@
         while self.gray_objects.non_empty():
             obj = self.gray_objects.pop()
             if not self.is_marked(obj, current_mark):
+                #
+                # Scan the content of 'obj'.  We use a snapshot-at-the-
+                # beginning order, meaning that we want to scan the state
+                # of the object as it was at the beginning of the current
+                # collection --- and not the current state, which might have
+                # been modified.  That's why we have a deletion barrier:
+                # when the mutator thread is about to change an object that
+                # is not yet marked, it will itself do the scanning of just
+                # this object, and mark the object.  But this function is not
+                # synchronized, which means that in some rare cases it's
+                # possible that the object is scanned a second time here
+                # (harmlessly).
+                #
+                # The order of the next two lines is essential!  *First*
+                # scan the object, adding all objects found to gray_objects;
+                # and *only then* set the mark.  This is essential, because
+                # otherwise, we might set the mark, then the main thread
+                # thinks a force_scan() is not necessary and modifies the
+                # content of 'obj', and then here in the collector thread
+                # we scan a modified content --- and the original content
+                # is never scanned.
+                #
+                self.trace(obj, self._collect_add_pending, None)
                 self.set_mark(obj, current_mark)
-                self.trace(obj, self._collect_add_pending, None)
                 #
                 # Interrupt early if the mutator's write barrier adds stuff
                 # to that list.  Note that the check is imprecise because
@@ -834,7 +878,9 @@
                     return
 
     def _collect_add_pending(self, root, ignored):
-        self.gray_objects.append(root.address[0])
+        obj = root.address[0]
+        self.get_mark(obj)
+        self.gray_objects.append(obj)
 
     def collector_sweep(self):
         n = 1
@@ -969,6 +1015,7 @@
                 if self.collect_run_finalizers_tail == self.NULL:
                     self.collect_run_finalizers_tail = finalizer_page
                 obj = x + size_gc_header
+                self.get_mark(obj)
                 self.gray_objects.append(obj)
             else:
                 # marked: relink into the collect_finalizer_pages list
diff --git a/pypy/translator/c/src/debug_lltrace.h 
b/pypy/translator/c/src/debug_lltrace.h
--- a/pypy/translator/c/src/debug_lltrace.h
+++ b/pypy/translator/c/src/debug_lltrace.h
@@ -14,6 +14,8 @@
 
 #else /*******************************************************/
 
+#  include "src/atomic_ops.h"
+
 
 #  define RPY_IS_TRACING           1
 #  define RPyTraceSet(ptr, mark)   _RPyTraceSet(&(ptr), (long)(ptr), mark)
@@ -28,35 +30,55 @@
 
 static struct _RPyTrace_s *_RPyTrace_start   = NULL;
 static struct _RPyTrace_s *_RPyTrace_stop    = NULL;
-static struct _RPyTrace_s *_RPyTrace_current = NULL;
-static const long _RPyTrace_default_size = 134217728;
+static struct _RPyTrace_s * volatile _RPyTrace_current = NULL;
+static const long _RPyTrace_default_size = 0x4000000;
 
-void _RPyTrace_WrapAround(void)
+void _RPyTrace_Setup(void)
 {
-  if (_RPyTrace_start == NULL)
-    {
-      char *csize = getenv("PYPYTRACEBUF");
-      long size = csize ? atol(csize) : 0;
-      if (size <= 1)
+    /* not thread-safe, but assume that it's called very early anyway,
+       when there is only one thread */
+    char *csize = getenv("PYPYTRACEBUF");
+    long size = csize ? atol(csize) : 0;
+    if (size <= 1)
         size = _RPyTrace_default_size;
-      _RPyTrace_start = malloc(size * sizeof(struct _RPyTrace_s));
-      RPyAssert(_RPyTrace_start, "not enough memory to allocate the trace");
-      _RPyTrace_stop = _RPyTrace_start + size;
-    }
-  _RPyTrace_current = _RPyTrace_start;
-  fprintf(stderr, "lltrace: buffer from %p to %p, size %ld entries\n",
-          _RPyTrace_start, _RPyTrace_stop,
-          (long)(_RPyTrace_stop - _RPyTrace_start));
+    _RPyTrace_start = calloc(size, sizeof(struct _RPyTrace_s));
+    RPyAssert(_RPyTrace_start, "not enough memory to allocate the trace");
+    _RPyTrace_stop = _RPyTrace_start + size;
+
+    _RPyTrace_current = _RPyTrace_start;
+    fprintf(stderr, "lltrace: buffer from %p to %p, size %ld entries\n",
+            _RPyTrace_start, _RPyTrace_stop,
+            (long)(_RPyTrace_stop - _RPyTrace_start));
 }
 
 void _RPyTraceSet(void *addr, long newvalue, long mark)
 {
-  if (_RPyTrace_current == _RPyTrace_stop)
-    _RPyTrace_WrapAround();
-  _RPyTrace_current->mark = mark;
-  _RPyTrace_current->addr = addr;
-  _RPyTrace_current->newvalue = newvalue;
-  ++_RPyTrace_current;
+    struct _RPyTrace_s *current, *next;
+    do {
+        current = _RPyTrace_current;
+        if (current == NULL) {
+            _RPyTrace_Setup();
+            current = _RPyTrace_current;
+        }
+        next = current + 1;
+        if (next == _RPyTrace_stop) next = _RPyTrace_start;
+    } while (!bool_cas((volatile unsigned long*)&_RPyTrace_current,
+                       (unsigned long)current,
+                       (unsigned long)next));
+    current->mark = mark;
+    current->addr = addr;
+    current->newvalue = newvalue;
+}
+
+void _RPyTraceDump(void)
+{
+    char *start   = _RPyTrace_start;
+    char *stop    = _RPyTrace_stop;
+    char *current = (char *)_RPyTrace_current;
+    FILE *f       = fopen("trace.dump", "wb");
+    fwrite(current, 1, stop - current, f);
+    fwrite(start,   1, current - start, f);
+    fclose(f);
 }
 
 #  endif
diff --git a/pypy/translator/c/test/test_newgc.py 
b/pypy/translator/c/test/test_newgc.py
--- a/pypy/translator/c/test/test_newgc.py
+++ b/pypy/translator/c/test/test_newgc.py
@@ -597,7 +597,8 @@
             p = lltype.malloc(B)
             llop.gc__collect(lltype.Void)
             p.a = lltype.malloc(A)
-            return p.a.x
+            n = p.a.x
+            return n + (~n)    # always -1
         return f
 
     def test_framework_late_filling_pointers(self):
@@ -1300,7 +1301,7 @@
         return f
 
     def test_gc_heap_stats(self):
-        res = self.run("gc_heap_stats")
+        res = self.run("gc_heap_stats", repetitions=1)
         assert res == 3011 or res == 3021
 
     def definestr_string_builder(cls):
@@ -1539,4 +1540,4 @@
 
 class TestMostlyConcurrentMarkSweepGC(TestUsingFramework):
     gcpolicy = "concurrentms"
-    repetitions = 10
+    repetitions = 100
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy concurrent-marksweep: Bah. A few hours of gdb'ing a pypy-c turned out this bug, that

Reply via email to