Author: Armin Rigo <[email protected]>
Branch: concurrent-marksweep
Changeset: r47976:89b12cccb3b1
Date: 2011-10-12 19:09 +0200
http://bitbucket.org/pypy/pypy/changeset/89b12cccb3b1/
Log: Bah. A few hours of gdb'ing a pypy-c turned out this bug, that I
explain in a one-page comment.
diff --git a/pypy/rpython/memory/gc/concurrentms.py
b/pypy/rpython/memory/gc/concurrentms.py
--- a/pypy/rpython/memory/gc/concurrentms.py
+++ b/pypy/rpython/memory/gc/concurrentms.py
@@ -432,7 +432,9 @@
# XXX
#def shrink_array(self, obj, smallerlength):
- # return False
+ # no real point in supporting this, but if you think it's a good
+ # idea, remember that changing the array length at run-time needs
+ # extra care for the collector thread
def enumerate_all_roots(self, callback, arg):
self.prebuilt_root_objects.foreach(callback, arg)
@@ -471,6 +473,7 @@
grow_reservation._always_inline_ = True
def deletion_barrier(self, addr_struct):
+ # XXX check the assembler
mark = self.header(addr_struct).tid & 0xFF
if mark != self.current_mark:
self.force_scan(addr_struct)
@@ -506,8 +509,16 @@
ll_assert(self.collection_running == 1,
"write barrier: wrong call?")
#
+ # It's fine to set the mark before tracing, because
+ # we are anyway in a 'mutex_lock' critical section.
+ # The collector thread will not exist the phase
+ # 'collection_running == 1' here.
self.set_mark(obj, self.current_mark)
self.trace(obj, self._barrier_add_extra, None)
+ #
+ # Still at 1:
+ ll_assert(self.collection_running == 1,
+ "write barrier: oups!?")
#
self.release(self.mutex_lock)
#debug_print("deletion_barrier done ", obj)
@@ -516,7 +527,9 @@
self.force_scan = force_scan
def _barrier_add_extra(self, root, ignored):
- self.extra_objects_to_mark.append(root.address[0])
+ obj = root.address[0]
+ self.get_mark(obj)
+ self.extra_objects_to_mark.append(obj)
def wait_for_the_end_of_collection(self):
@@ -571,6 +584,8 @@
# can start the next collection, and then this function returns
# with a collection in progress, which it should not. Be careful
# to call execute_finalizers_ll() in the caller somewhere.
+ ll_assert(self.collection_running == 0,
+ "collector thread not paused?")
def execute_finalizers_ll(self):
@@ -637,6 +652,7 @@
x = llarena.getfakearenaaddress(x) + 8
obj = x + self.gcheaderbuilder.size_gc_header
#debug_print("_objects_with_finalizers_to_run", obj)
+ self.get_mark(obj)
self.gray_objects.append(obj)
p = list_next(p)
#
@@ -672,10 +688,12 @@
def _add_stack_root(self, root):
obj = root.address[0]
#debug_print("_add_stack_root", obj)
+ self.get_mark(obj)
self.gray_objects.append(obj)
def _add_prebuilt_root(self, obj, ignored):
#debug_print("_add_prebuilt_root", obj)
+ self.get_mark(obj)
self.gray_objects.append(obj)
def debug_check_lists(self):
@@ -780,14 +798,17 @@
return mark ^ (MARK_VALUE_1 ^ MARK_VALUE_2)
def is_marked(self, obj, current_mark):
- mark = self.header(obj).tid & 0xFF
- ll_assert(mark in (MARK_VALUE_1, MARK_VALUE_2, MARK_VALUE_STATIC),
- "bad mark byte in object")
- return mark == current_mark
+ return self.get_mark(obj) == current_mark
def set_mark(self, obj, newmark):
_set_mark(self.header(obj), newmark)
+ def get_mark(self, obj):
+ mark = self.header(obj).tid & 0xFF
+ ll_assert(mark == MARK_VALUE_1 or mark == MARK_VALUE_2 or
+ mark == MARK_VALUE_STATIC, "bad mark byte in object")
+ return mark
+
def collector_mark(self):
while True:
#
@@ -804,6 +825,7 @@
#debug_print("...collector thread has mutex_lock")
while self.extra_objects_to_mark.non_empty():
obj = self.extra_objects_to_mark.pop()
+ self.get_mark(obj)
self.gray_objects.append(obj)
self.release(self.mutex_lock)
#
@@ -821,8 +843,30 @@
while self.gray_objects.non_empty():
obj = self.gray_objects.pop()
if not self.is_marked(obj, current_mark):
+ #
+ # Scan the content of 'obj'. We use a snapshot-at-the-
+ # beginning order, meaning that we want to scan the state
+ # of the object as it was at the beginning of the current
+ # collection --- and not the current state, which might have
+ # been modified. That's why we have a deletion barrier:
+ # when the mutator thread is about to change an object that
+ # is not yet marked, it will itself do the scanning of just
+ # this object, and mark the object. But this function is not
+ # synchronized, which means that in some rare cases it's
+ # possible that the object is scanned a second time here
+ # (harmlessly).
+ #
+ # The order of the next two lines is essential! *First*
+ # scan the object, adding all objects found to gray_objects;
+ # and *only then* set the mark. This is essential, because
+ # otherwise, we might set the mark, then the main thread
+ # thinks a force_scan() is not necessary and modifies the
+ # content of 'obj', and then here in the collector thread
+ # we scan a modified content --- and the original content
+ # is never scanned.
+ #
+ self.trace(obj, self._collect_add_pending, None)
self.set_mark(obj, current_mark)
- self.trace(obj, self._collect_add_pending, None)
#
# Interrupt early if the mutator's write barrier adds stuff
# to that list. Note that the check is imprecise because
@@ -834,7 +878,9 @@
return
def _collect_add_pending(self, root, ignored):
- self.gray_objects.append(root.address[0])
+ obj = root.address[0]
+ self.get_mark(obj)
+ self.gray_objects.append(obj)
def collector_sweep(self):
n = 1
@@ -969,6 +1015,7 @@
if self.collect_run_finalizers_tail == self.NULL:
self.collect_run_finalizers_tail = finalizer_page
obj = x + size_gc_header
+ self.get_mark(obj)
self.gray_objects.append(obj)
else:
# marked: relink into the collect_finalizer_pages list
diff --git a/pypy/translator/c/src/debug_lltrace.h
b/pypy/translator/c/src/debug_lltrace.h
--- a/pypy/translator/c/src/debug_lltrace.h
+++ b/pypy/translator/c/src/debug_lltrace.h
@@ -14,6 +14,8 @@
#else /*******************************************************/
+# include "src/atomic_ops.h"
+
# define RPY_IS_TRACING 1
# define RPyTraceSet(ptr, mark) _RPyTraceSet(&(ptr), (long)(ptr), mark)
@@ -28,35 +30,55 @@
static struct _RPyTrace_s *_RPyTrace_start = NULL;
static struct _RPyTrace_s *_RPyTrace_stop = NULL;
-static struct _RPyTrace_s *_RPyTrace_current = NULL;
-static const long _RPyTrace_default_size = 134217728;
+static struct _RPyTrace_s * volatile _RPyTrace_current = NULL;
+static const long _RPyTrace_default_size = 0x4000000;
-void _RPyTrace_WrapAround(void)
+void _RPyTrace_Setup(void)
{
- if (_RPyTrace_start == NULL)
- {
- char *csize = getenv("PYPYTRACEBUF");
- long size = csize ? atol(csize) : 0;
- if (size <= 1)
+ /* not thread-safe, but assume that it's called very early anyway,
+ when there is only one thread */
+ char *csize = getenv("PYPYTRACEBUF");
+ long size = csize ? atol(csize) : 0;
+ if (size <= 1)
size = _RPyTrace_default_size;
- _RPyTrace_start = malloc(size * sizeof(struct _RPyTrace_s));
- RPyAssert(_RPyTrace_start, "not enough memory to allocate the trace");
- _RPyTrace_stop = _RPyTrace_start + size;
- }
- _RPyTrace_current = _RPyTrace_start;
- fprintf(stderr, "lltrace: buffer from %p to %p, size %ld entries\n",
- _RPyTrace_start, _RPyTrace_stop,
- (long)(_RPyTrace_stop - _RPyTrace_start));
+ _RPyTrace_start = calloc(size, sizeof(struct _RPyTrace_s));
+ RPyAssert(_RPyTrace_start, "not enough memory to allocate the trace");
+ _RPyTrace_stop = _RPyTrace_start + size;
+
+ _RPyTrace_current = _RPyTrace_start;
+ fprintf(stderr, "lltrace: buffer from %p to %p, size %ld entries\n",
+ _RPyTrace_start, _RPyTrace_stop,
+ (long)(_RPyTrace_stop - _RPyTrace_start));
}
void _RPyTraceSet(void *addr, long newvalue, long mark)
{
- if (_RPyTrace_current == _RPyTrace_stop)
- _RPyTrace_WrapAround();
- _RPyTrace_current->mark = mark;
- _RPyTrace_current->addr = addr;
- _RPyTrace_current->newvalue = newvalue;
- ++_RPyTrace_current;
+ struct _RPyTrace_s *current, *next;
+ do {
+ current = _RPyTrace_current;
+ if (current == NULL) {
+ _RPyTrace_Setup();
+ current = _RPyTrace_current;
+ }
+ next = current + 1;
+ if (next == _RPyTrace_stop) next = _RPyTrace_start;
+ } while (!bool_cas((volatile unsigned long*)&_RPyTrace_current,
+ (unsigned long)current,
+ (unsigned long)next));
+ current->mark = mark;
+ current->addr = addr;
+ current->newvalue = newvalue;
+}
+
+void _RPyTraceDump(void)
+{
+ char *start = _RPyTrace_start;
+ char *stop = _RPyTrace_stop;
+ char *current = (char *)_RPyTrace_current;
+ FILE *f = fopen("trace.dump", "wb");
+ fwrite(current, 1, stop - current, f);
+ fwrite(start, 1, current - start, f);
+ fclose(f);
}
# endif
diff --git a/pypy/translator/c/test/test_newgc.py
b/pypy/translator/c/test/test_newgc.py
--- a/pypy/translator/c/test/test_newgc.py
+++ b/pypy/translator/c/test/test_newgc.py
@@ -597,7 +597,8 @@
p = lltype.malloc(B)
llop.gc__collect(lltype.Void)
p.a = lltype.malloc(A)
- return p.a.x
+ n = p.a.x
+ return n + (~n) # always -1
return f
def test_framework_late_filling_pointers(self):
@@ -1300,7 +1301,7 @@
return f
def test_gc_heap_stats(self):
- res = self.run("gc_heap_stats")
+ res = self.run("gc_heap_stats", repetitions=1)
assert res == 3011 or res == 3021
def definestr_string_builder(cls):
@@ -1539,4 +1540,4 @@
class TestMostlyConcurrentMarkSweepGC(TestUsingFramework):
gcpolicy = "concurrentms"
- repetitions = 10
+ repetitions = 100
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit