Author: Remi Meier <[email protected]>
Branch: stmgc-c4
Changeset: r65682:f819be0d01ca
Date: 2013-07-26 11:20 +0200
http://bitbucket.org/pypy/pypy/changeset/f819be0d01ca/

Log:    Add fastpaths for read/write barriers for x64. The asm is still
        seriously non-optimal in the fastpath

diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2182,12 +2182,19 @@
         assert isinstance(result_loc, RegLoc)
         mc.POP_r(result_loc.value)
         
-    def _get_private_rev_num_addr(self):
+    def _get_stm_private_rev_num_addr(self):
         assert self.cpu.gc_ll_descr.stm
         rn = rstm.get_adr_of_private_rev_num()
         rn = rn - stmtlocal.threadlocal_base()
         assert rx86.fits_in_32bits(rn)
         return rn
+
+    def _get_stm_read_barrier_cache_addr(self):
+        assert self.cpu.gc_ll_descr.stm
+        rbc = rstm.get_adr_of_read_barrier_cache()
+        rbc = rbc - stmtlocal.threadlocal_base()
+        assert rx86.fits_in_32bits(rbc)
+        return rbc
         
     def _stm_barrier_fastpath(self, mc, descr, arglocs, is_frame=False,
                               align_stack=False):
@@ -2207,18 +2214,82 @@
         #
         # FASTPATH:
         #
-        rn = self._get_private_rev_num_addr()
+        # write_barrier:
+        # (obj->h_revision != stm_private_rev_num)
+        #     || (obj->h_tid & GCFLAG_WRITE_BARRIER) != 0)
+        # read_barrier:
+        # (obj->h_revision != stm_private_rev_num)
+        #     && (FXCACHE_AT(obj) != obj)))
+        assert not IS_X86_32 # XXX: todo
+        jz_location = 0
+        jz_location2 = 0
+        jnz_location = 0
+        # compare h_revision with stm_private_rev_num (XXX: may be slow)
+        rn = self._get_stm_private_rev_num_addr()
+        stmtlocal.tl_segment_prefix(mc)
+        mc.MOV_rj(X86_64_SCRATCH_REG.value, rn)
+        if loc_base == ebp:
+            mc.CMP_rb(X86_64_SCRATCH_REG.value, StmGC.H_REVISION)
+        else:
+            mc.CMP(X86_64_SCRATCH_REG, mem(loc_base, StmGC.H_REVISION))
+            
         if isinstance(descr, STMReadBarrierDescr):
-            # (obj->h_revision != stm_private_rev_num)
-            #      && (FXCACHE_AT(obj) != obj)))
-            stmtlocal.tl_segment_prefix(mc)
-            #mc.CMP_jr(rn, loc_base.value)
-            mc.MOV_rj(X86_64_SCRATCH_REG.value, rn)
-            mc.CMP(X86_64_SCRATCH_REG, mem(loc_base, StmGC.H_REVISION))
+            # jump to end if h_rev==priv_rev
             mc.J_il8(rx86.Conditions['Z'], 0) # patched below
             jz_location = mc.get_relative_pos()
-        else:
-            jz_location = 0
+        else: # write_barrier
+            # jump to slowpath if h_rev!=priv_rev
+            mc.J_il8(rx86.Conditions['NZ'], 0) # patched below
+            jnz_location = mc.get_relative_pos()
+
+        if isinstance(descr, STMReadBarrierDescr):
+            # FXCACHE_AT(obj) != obj
+            # XXX: optimize...
+            temp = loc_base.find_unused_reg()
+            mc.PUSH_r(temp.value)
+            mc.MOV_rr(temp.value, loc_base.value)
+            mc.AND_ri(temp.value, StmGC.FX_MASK)
+
+            # XXX: addressings like [rdx+rax*1] don't seem to work
+            rbc = self._get_stm_read_barrier_cache_addr()
+            stmtlocal.tl_segment_prefix(mc)
+            mc.MOV_rj(X86_64_SCRATCH_REG.value, rbc)
+            mc.ADD_rr(X86_64_SCRATCH_REG.value, temp.value)
+            mc.CMP(loc_base, mem(X86_64_SCRATCH_REG, 0))
+            mc.POP_r(temp.value)
+            mc.J_il8(rx86.Conditions['Z'], 0) # patched below
+            jz_location2 = mc.get_relative_pos()
+            # <stm_read_barrier+21>:   mov    rdx,0xffffffffffffffb0
+            # <stm_read_barrier+28>:   movzx  eax,di
+            # <stm_read_barrier+31>:   mov    rdx,QWORD PTR fs:[rdx]
+            # <stm_read_barrier+35>:   cmp    rdi,QWORD PTR [rdx+rax*1]
+            # <stm_read_barrier+39>:   je     0x401f61 <stm_read_barrier+17>
+            # <stm_read_barrier+41>:   jmp    0x6a59f0 <stm_DirectReadBarrier>
+        
+        if isinstance(descr, STMWriteBarrierDescr):
+            # obj->h_tid & GCFLAG_WRITE_BARRIER) != 0
+            if loc_base == ebp:
+                #mc.MOV_rb(X86_64_SCRATCH_REG.value, StmGC.H_TID)
+                mc.TEST8_bi(StmGC.H_TID, StmGC.GCFLAG_WRITE_BARRIER)
+            else:
+                # mc.MOV(X86_64_SCRATCH_REG, mem(loc_base, StmGC.H_TID))
+                mc.TEST8_mi((loc_base.value, StmGC.H_TID),
+                            StmGC.GCFLAG_WRITE_BARRIER)
+            #doesn't work:
+            # mc.TEST(X86_64_SCRATCH_REG, imm(StmGC.GCFLAG_WRITE_BARRIER))
+            mc.J_il8(rx86.Conditions['NZ'], 0) # patched below
+            jnz_location2 = mc.get_relative_pos()
+            
+            # jump to end
+            mc.JMP_l8(0) # patched below
+            jz_location = mc.get_relative_pos()
+            
+            # jump target slowpath:
+            offset = mc.get_relative_pos() - jnz_location
+            offset2 = mc.get_relative_pos() - jnz_location2
+            assert 0 < offset <= 127
+            mc.overwrite(jnz_location - 1, chr(offset))
+            mc.overwrite(jnz_location2 - 1, chr(offset2))
         #
         # SLOWPATH_START
         #
@@ -2243,10 +2314,14 @@
         #
         # SLOWPATH_END
         #
+        # jump target end:
+        offset = mc.get_relative_pos() - jz_location
+        assert 0 < offset <= 127
+        mc.overwrite(jz_location - 1, chr(offset))
         if isinstance(descr, STMReadBarrierDescr):
-            offset = mc.get_relative_pos() - jz_location
+            offset = mc.get_relative_pos() - jz_location2
             assert 0 < offset <= 127
-            mc.overwrite(jz_location - 1, chr(offset))
+            mc.overwrite(jz_location2 - 1, chr(offset))
 
 
         
diff --git a/rpython/memory/gc/stmgc.py b/rpython/memory/gc/stmgc.py
--- a/rpython/memory/gc/stmgc.py
+++ b/rpython/memory/gc/stmgc.py
@@ -12,23 +12,7 @@
 
 WORD = LONG_BIT // 8
 NULL = llmemory.NULL
-
-# keep in sync with stmgc.h & et.h:
 first_gcflag = 1 << (LONG_BIT//2)
-GCFLAG_OLD                    = first_gcflag << 0
-GCFLAG_VISITED                = first_gcflag << 1
-GCFLAG_PUBLIC                 = first_gcflag << 2
-GCFLAG_PREBUILT_ORIGINAL      = first_gcflag << 3
-GCFLAG_PUBLIC_TO_PRIVATE      = first_gcflag << 4
-GCFLAG_WRITE_BARRIER          = first_gcflag << 5 # stmgc.h
-GCFLAG_NURSERY_MOVED          = first_gcflag << 6
-GCFLAG_BACKUP_COPY            = first_gcflag << 7 # debug
-GCFLAG_STUB                   = first_gcflag << 8 # debug
-GCFLAG_PRIVATE_FROM_PROTECTED = first_gcflag << 9
-GCFLAG_HAS_ID                 = first_gcflag << 10
-
-PREBUILT_FLAGS    = first_gcflag * (1 + 2 + 4 + 8)
-PREBUILT_REVISION = r_uint(1)
 
 
 class StmGC(MovingGCBase):
@@ -53,6 +37,27 @@
     TRANSLATION_PARAMS = {
     }
 
+    # keep in sync with stmgc.h & et.h:
+    GCFLAG_OLD                    = first_gcflag << 0
+    GCFLAG_VISITED                = first_gcflag << 1
+    GCFLAG_PUBLIC                 = first_gcflag << 2
+    GCFLAG_PREBUILT_ORIGINAL      = first_gcflag << 3
+    GCFLAG_PUBLIC_TO_PRIVATE      = first_gcflag << 4
+    GCFLAG_WRITE_BARRIER          = first_gcflag << 5 # stmgc.h
+    GCFLAG_NURSERY_MOVED          = first_gcflag << 6
+    GCFLAG_BACKUP_COPY            = first_gcflag << 7 # debug
+    GCFLAG_STUB                   = first_gcflag << 8 # debug
+    GCFLAG_PRIVATE_FROM_PROTECTED = first_gcflag << 9
+    GCFLAG_HAS_ID                 = first_gcflag << 10
+    GCFLAG_IMMUTABLE              = first_gcflag << 11;
+    GCFLAG_SMALLSTUB              = first_gcflag << 12;
+    
+    PREBUILT_FLAGS    = first_gcflag * (1 + 2 + 4 + 8)
+    PREBUILT_REVISION = r_uint(1)
+    
+    FX_MASK = 65535
+
+
     def setup(self):
         # Hack: MovingGCBase.setup() sets up stuff related to id(), which
         # we implement differently anyway.  So directly call GCBase.setup().
@@ -75,7 +80,7 @@
 
     def get_original_copy(self, obj):
         addr = llmemory.cast_ptr_to_adr(obj)
-        if bool(self.get_hdr_tid(addr)[0] & GCFLAG_PREBUILT_ORIGINAL):
+        if bool(self.get_hdr_tid(addr)[0] & self.GCFLAG_PREBUILT_ORIGINAL):
             return obj
         #
         orig = self.get_hdr_original(addr)[0]
@@ -125,7 +130,7 @@
         """Means the reference will stay valid, except if not
         seen by the GC, then it can get collected."""
         tid = self.get_hdr_tid(obj)[0]
-        if bool(tid & GCFLAG_OLD):
+        if bool(tid & self.GCFLAG_OLD):
             return False
         return True
         
diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py
--- a/rpython/rlib/rstm.py
+++ b/rpython/rlib/rstm.py
@@ -7,6 +7,10 @@
     addr = llop.stm_get_adr_of_private_rev_num(llmemory.Address)
     return rffi.cast(lltype.Signed, addr)
 
+def get_adr_of_read_barrier_cache():
+    addr = llop.stm_get_adr_of_read_barrier_cache(llmemory.Address)
+    return rffi.cast(lltype.Signed, addr)
+
 def become_inevitable():
     llop.stm_become_inevitable(lltype.Void)
 
diff --git a/rpython/rtyper/lltypesystem/lloperation.py 
b/rpython/rtyper/lltypesystem/lloperation.py
--- a/rpython/rtyper/lltypesystem/lloperation.py
+++ b/rpython/rtyper/lltypesystem/lloperation.py
@@ -457,6 +457,7 @@
     'stm_inspect_abort_info': LLOp(sideeffects=False),
 
     'stm_get_adr_of_private_rev_num':LLOp(),
+    'stm_get_adr_of_read_barrier_cache':LLOp(),
     
     # __________ address operations __________
 
diff --git a/rpython/translator/c/funcgen.py b/rpython/translator/c/funcgen.py
--- a/rpython/translator/c/funcgen.py
+++ b/rpython/translator/c/funcgen.py
@@ -593,6 +593,7 @@
     OP_STM_POP_ROOT_INTO                = _OP_STM
     OP_STM_GET_ROOT_STACK_TOP           = _OP_STM
     OP_STM_GET_ADR_OF_PRIVATE_REV_NUM   = _OP_STM
+    OP_STM_GET_ADR_OF_READ_BARRIER_CACHE= _OP_STM
     OP_STM_ALLOCATE                     = _OP_STM
     OP_STM_WEAKREF_ALLOCATE             = _OP_STM
     OP_STM_GET_TID                      = _OP_STM
diff --git a/rpython/translator/stm/funcgen.py 
b/rpython/translator/stm/funcgen.py
--- a/rpython/translator/stm/funcgen.py
+++ b/rpython/translator/stm/funcgen.py
@@ -103,6 +103,11 @@
     result = funcgen.expr(op.result)
     return '%s = (%s)&stm_private_rev_num;' % (
         result, cdecl(funcgen.lltypename(op.result), ''))
+
+def stm_get_adr_of_read_barrier_cache(funcgen, op):
+    result = funcgen.expr(op.result)
+    return '%s = (%s)&stm_read_barrier_cache;' % (
+        result, cdecl(funcgen.lltypename(op.result), ''))
     
     
 def stm_weakref_allocate(funcgen, op):
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to