Author: Armin Rigo <[email protected]>
Branch: stmgc-c7
Changeset: r72294:a006a6263e15
Date: 2014-06-30 18:41 +0200
http://bitbucket.org/pypy/pypy/changeset/a006a6263e15/

Log:    in-progress

diff --git a/rpython/jit/backend/llsupport/gc.py 
b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -302,9 +302,11 @@
                 self.extract_flag_byte(self.jit_wb_cards_set))
             #
             # the x86 backend uses the following "accidental" facts to
-            # avoid one instruction:
-            assert self.jit_wb_cards_set_byteofs == self.jit_wb_if_flag_byteofs
-            assert self.jit_wb_cards_set_singlebyte == -0x80
+            # avoid one instruction (not with stm):
+            if not gc_ll_descr.stm:
+                assert (self.jit_wb_cards_set_byteofs ==
+                        self.jit_wb_if_flag_byteofs)
+                assert self.jit_wb_cards_set_singlebyte == -0x80
         else:
             self.jit_wb_cards_set = 0
 
diff --git a/rpython/jit/backend/llsupport/rewrite.py 
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -416,18 +416,14 @@
             self.gen_write_barrier(val, op.stm_location)
         self.newops.append(op)
 
-    def handle_write_barrier_setinteriorfield(self, op):
-        val = op.getarg(0)
-        if self.must_apply_write_barrier(val, op.getarg(2)):
-            self.gen_write_barrier(val, op.stm_location)
-        self.newops.append(op)
-
     def handle_write_barrier_setarrayitem(self, op):
         val = op.getarg(0)
         if self.must_apply_write_barrier(val, op.getarg(2)):
             self.gen_write_barrier_array(val, op.getarg(1), op.stm_location)
         self.newops.append(op)
 
+    handle_write_barrier_setinteriorfield = handle_write_barrier_setarrayitem
+
     def gen_write_barrier(self, v_base, stm_location):
         write_barrier_descr = self.gc_ll_descr.write_barrier_descr
         args = [v_base]
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py 
b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -678,7 +678,7 @@
             jump(p1, p2)
         """, """
             [p1, p2]
-            cond_call_gc_wb(p1, descr=wbdescr)
+            cond_call_gc_wb_array(p1, 0, descr=wbdescr)
             setinteriorfield_gc(p1, 0, p2, descr=interiorzdescr)
             jump(p1, p2)
         """, interiorzdescr=interiorzdescr)
diff --git a/rpython/jit/backend/llsupport/test/test_stmrewrite.py 
b/rpython/jit/backend/llsupport/test/test_stmrewrite.py
--- a/rpython/jit/backend/llsupport/test/test_stmrewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_stmrewrite.py
@@ -51,7 +51,7 @@
         self.gc_ll_descr = GcLLDescr_framework(gcdescr, None, None, None,
                                                really_not_translated=True)
         self.gc_ll_descr.write_barrier_descr.has_write_barrier_from_array = (
-            lambda cpu: False)   # for now
+            lambda cpu: True)
         self.gc_ll_descr.minimal_size_in_nursery = 16
         #
         class FakeCPU(BaseFakeCPU):
@@ -515,9 +515,9 @@
             jump()
         """, """
             [p1, i1, p2, p3, i3, p4]
-            cond_call_gc_wb(p1, descr=wbdescr)
+            cond_call_gc_wb_array(p1, i1, descr=wbdescr)
             setarrayitem_gc(p1, i1, p2, descr=adescr)
-            cond_call_gc_wb(p3, descr=wbdescr)
+            cond_call_gc_wb_array(p3, i3, descr=wbdescr)
             setarrayitem_gc(p3, i3, p4, descr=adescr)
 
             jump()
@@ -532,9 +532,10 @@
             jump()
         """, """
             [p1, p2, i2, p3, i3]
-            cond_call_gc_wb(p1, descr=wbdescr)
+            cond_call_gc_wb_array(p1, i2, descr=wbdescr)
             setarrayitem_gc(p1, i2, p2, descr=adescr)
             i4 = read_timestamp()
+            cond_call_gc_wb_array(p1, i3, descr=wbdescr)
             setarrayitem_gc(p1, i3, p3, descr=adescr)
 
             jump()
@@ -549,9 +550,10 @@
             jump()
         """, """
             [p1, p2, i2, p3, i3]
-            cond_call_gc_wb(p1, descr=wbdescr)
+            cond_call_gc_wb_array(p1, i2, descr=wbdescr)
             setinteriorfield_gc(p1, i2, p2, descr=intzdescr)
             i4 = read_timestamp()
+            cond_call_gc_wb_array(p1, i3, descr=wbdescr)
             setinteriorfield_gc(p1, i3, p3, descr=intzdescr)
 
             jump()
@@ -1115,7 +1117,7 @@
             setfield_gc(p1, 8111, descr=tiddescr)
             setfield_gc(p1, 5, descr=clendescr)
             label(p1, i2, p3)
-            cond_call_gc_wb(p1, descr=wbdescr)
+            cond_call_gc_wb_array(p1, i2, descr=wbdescr)
             setarrayitem_gc(p1, i2, p3, descr=cdescr)
         """)
 
diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -451,13 +451,17 @@
             # A final TEST8 before the RET, for the caller.  Careful to
             # not follow this instruction with another one that changes
             # the status of the CPU flags!
-            if IS_X86_32:
-                mc.MOV_rs(eax.value, 3*WORD)
+            if stm:
+                mc.TEST8_rr(eax.value | BYTE_REG_FLAG,
+                            eax.value | BYTE_REG_FLAG)
             else:
-                mc.MOV_rs(eax.value, WORD)
-            mc.TEST8(addr_add_const(self.SEGMENT_GC, eax,
-                                    descr.jit_wb_if_flag_byteofs),
-                     imm(-0x80))
+                if IS_X86_32:
+                    mc.MOV_rs(eax.value, 3*WORD)
+                else:
+                    mc.MOV_rs(eax.value, WORD)
+                mc.TEST8(addr_add_const(self.SEGMENT_GC, eax,
+                                        descr.jit_wb_if_flag_byteofs),
+                         imm(-0x80))
         #
 
         if not for_frame:
@@ -2218,15 +2222,17 @@
             cls = self.cpu.gc_ll_descr.has_write_barrier_class()
             assert cls is not None and isinstance(descr, cls)
         #
+        stm = self.cpu.gc_ll_descr.stm
         card_marking = False
         mask = descr.jit_wb_if_flag_singlebyte
         if array and descr.jit_wb_cards_set != 0:
-            # assumptions the rest of the function depends on:
-            assert (descr.jit_wb_cards_set_byteofs ==
-                    descr.jit_wb_if_flag_byteofs)
-            assert descr.jit_wb_cards_set_singlebyte == -0x80
+            if not stm:
+                # assumptions the rest of the function depends on:
+                assert (descr.jit_wb_cards_set_byteofs ==
+                        descr.jit_wb_if_flag_byteofs)
+                assert descr.jit_wb_cards_set_singlebyte == -0x80
+                mask = descr.jit_wb_if_flag_singlebyte | -0x80
             card_marking = True
-            mask = descr.jit_wb_if_flag_singlebyte | -0x80
         #
         loc_base = arglocs[0]
         if is_frame:
@@ -2242,10 +2248,18 @@
         # for cond_call_gc_wb_array, also add another fast path:
         # if GCFLAG_CARDS_SET, then we can just set one bit and be done
         if card_marking:
-            # GCFLAG_CARDS_SET is in this byte at 0x80, so this fact can
-            # been checked by the status flags of the previous TEST8
-            mc.J_il8(rx86.Conditions['S'], 0) # patched later
-            js_location = mc.get_relative_pos()
+            if stm:
+                loc2 = addr_add_const(self.SEGMENT_GC, loc_base,
+                                      descr.jit_wb_cards_set_byteofs)
+                mask2 = descr.jit_wb_cards_set_singlebyte
+                mc.TEST8(loc2, imm(mask2))
+                mc.J_il8(rx86.Conditions['NZ'], 0) # patched later
+                js_location = mc.get_relative_pos()
+            else:
+                # GCFLAG_CARDS_SET is in this byte at 0x80, so this fact can
+                # been checked by the status flags of the previous TEST8
+                mc.J_il8(rx86.Conditions['S'], 0) # patched later
+                js_location = mc.get_relative_pos()
         else:
             js_location = 0
 
@@ -2266,7 +2280,7 @@
         #
         if not is_frame:
             mc.PUSH(loc_base)
-            if self.cpu.gc_ll_descr.stm:
+            if stm:
                 # get the num and ref components of the stm_location, and
                 # push them to the stack.  It's 16 bytes, so alignment is
                 # still ok.  The one or three words pushed here are removed
@@ -2286,7 +2300,10 @@
             # The helper ends again with a check of the flag in the object.
             # So here, we can simply write again a 'JNS', which will be
             # taken if GCFLAG_CARDS_SET is still not set.
-            mc.J_il8(rx86.Conditions['NS'], 0) # patched later
+            if stm:
+                mc.J_il8(rx86.Conditions['Z'], 0) # patched later
+            else:
+                mc.J_il8(rx86.Conditions['NS'], 0) # patched later
             jns_location = mc.get_relative_pos()
             #
             # patch the JS above
@@ -2297,7 +2314,56 @@
             # case GCFLAG_CARDS_SET: emit a few instructions to do
             # directly the card flag setting
             loc_index = arglocs[1]
-            if isinstance(loc_index, RegLoc):
+
+            if stm:
+                # must write the value CARD_MARKED into the byte at:
+                #     write_locks_base + (object >> 4) + (index / CARD_SIZE)
+                #
+                write_locks_base = rstm.adr__stm_write_slowpath_card_extra_base
+                if rstm.CARD_SIZE == 32:
+                    card_bits = 5
+                elif rstm.CARD_SIZE == 64:
+                    card_bits = 6
+                elif rstm.CARD_SIZE == 128:
+                    card_bits = 7
+                else:
+                    raise AssertionError("CARD_SIZE should be 32/64/128")
+                #
+                # idea:  mov r11, loc_base    # the object
+                #        and r11, ~15         # align
+                #        lea r11, [loc_index + r11<<(card_bits-4)]
+                #        shr r11, card_bits
+                #        mov [r11 + write_locks_base], card_marked
+                r11 = X86_64_SCRATCH_REG
+                if isinstance(loc_index, RegLoc):
+                    if isinstance(loc_base, RegLoc):
+                        mc.MOV_rr(r11.value, loc_base.value)
+                        mc.AND_ri(r11.value, ~15)
+                    else:
+                        assert isinstance(loc_base, ImmedLoc)
+                        mc.MOV_ri(r11.value, loc_base.value & ~15)  # 32/64bit
+                    mc.LEA_ra(r11.value, (self.SEGMENT_NO,
+                                          loc_index.value,
+                                          r11.value,
+                                          card_bits - 4,
+                                          0))
+                    mc.SHR_ri(r11.value, card_bits)
+                else:
+                    # XXX these cases could be slightly more optimized
+                    assert isinstance(loc_index, ImmedLoc)
+                    cardindex = loc_index.value >> card_bits
+                    if isinstance(loc_base, RegLoc):
+                        mc.MOV_ri(r11.value, cardindex << 4)     # 32/64bit
+                        mc.ADD_rr(r11.value, loc_base.value)
+                        mc.SHR_ri(r11.value, 4)
+                    else:
+                        mc.MOV_ri(r11.value, cardindex + (loc_base.value >> 4))
+                #
+                assert rx86.fits_in_32bits(write_locks_base), "XXX"
+                mc.MOV8_mi((self.SEGMENT_NO, r11.value, write_locks_base),
+                           rstm.CARD_MARKED)
+
+            elif isinstance(loc_index, RegLoc):
                 if IS_X86_64 and isinstance(loc_base, RegLoc):
                     # copy loc_index into r11
                     tmp1 = X86_64_SCRATCH_REG
diff --git a/rpython/memory/gc/stmgc.py b/rpython/memory/gc/stmgc.py
--- a/rpython/memory/gc/stmgc.py
+++ b/rpython/memory/gc/stmgc.py
@@ -34,6 +34,7 @@
     VISIT_FPTR = lltype.Ptr(lltype.FuncType([llmemory.Address], lltype.Void))
 
     JIT_WB_IF_FLAG = 0x01            # value of _STM_GCFLAG_WRITE_BARRIER
+    JIT_WB_CARDS_SET = 0x08          # value of _STM_GCFLAG_CARDS_SET
     stm_fast_alloc = 66*1024         # value of _STM_FAST_ALLOC in stmgc.h
     minimal_size_in_nursery = 16     # hard-coded lower limit
 
diff --git a/rpython/memory/gctransform/stmframework.py 
b/rpython/memory/gctransform/stmframework.py
--- a/rpython/memory/gctransform/stmframework.py
+++ b/rpython/memory/gctransform/stmframework.py
@@ -141,6 +141,12 @@
             lltype.Signed, rstm.adr_write_slowpath)
         hop.genop("cast_int_to_ptr", [c_write_slowpath], resultvar=op.result)
 
+    def gct_get_write_barrier_from_array_failing_case(self, hop):
+        op = hop.spaceop
+        c_write_slowpath = rmodel.inputconst(
+            lltype.Signed, rstm.adr_write_slowpath_card_extra)
+        hop.genop("cast_int_to_ptr", [c_write_slowpath], resultvar=op.result)
+
     def gct_gc_can_move(self, hop):
         hop.rename('stm_can_move')
 
diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py
--- a/rpython/rlib/rstm.py
+++ b/rpython/rlib/rstm.py
@@ -25,6 +25,12 @@
 adr_segment_base = (
     CFlexSymbolic('((long)&STM_SEGMENT->segment_base)'))
 adr_write_slowpath = CFlexSymbolic('((long)&_stm_write_slowpath)')
+adr_write_slowpath_card_extra = (
+    CFlexSymbolic('((long)&_stm_write_slowpath_card_extra)'))
+adr__stm_write_slowpath_card_extra_base = (
+    CFlexSymbolic('((long)&_stm_write_slowpath_card_extra_base)'))
+CARD_MARKED = CFlexSymbolic('_STM_CARD_MARKED')
+CARD_SIZE   = CFlexSymbolic('_STM_CARD_SIZE')
 
 adr__pypy_stm_become_inevitable = (
     CFlexSymbolic('((long)&_pypy_stm_become_inevitable)'))
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to