Author: Remi Meier <[email protected]>
Branch: stmgc-c4
Changeset: r65685:fd02ae6aa5ba
Date: 2013-07-26 13:45 +0200
http://bitbucket.org/pypy/pypy/changeset/fd02ae6aa5ba/

Log:    improve asm of fastpath in stm barriers

diff --git a/rpython/jit/backend/llsupport/gc.py 
b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -441,14 +441,14 @@
     def __init__(self, gc_ll_descr, stmcat):
         assert stmcat == 'P2R'
         STMBarrierDescr.__init__(self, gc_ll_descr, stmcat,
-                                 'stm_read_barrier')
+                                 'stm_DirectReadBarrier')
 
         
 class STMWriteBarrierDescr(STMBarrierDescr):
     def __init__(self, gc_ll_descr, stmcat):
         assert stmcat in ['P2W']
         STMBarrierDescr.__init__(self, gc_ll_descr, stmcat,
-                                 'stm_write_barrier')
+                                 'stm_WriteBarrier')
     
         
 class GcLLDescr_framework(GcLLDescription):
diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2204,6 +2204,7 @@
         assert isinstance(descr, STMBarrierDescr)
         assert descr.returns_modified_object
         loc_base = arglocs[0]
+        temp_loc = arglocs[1]
         assert isinstance(loc_base, RegLoc)
         
         helper_num = 0
@@ -2242,32 +2243,22 @@
             mc.J_il8(rx86.Conditions['NZ'], 0) # patched below
             jnz_location = mc.get_relative_pos()
 
+        # FXCACHE_AT(obj) != obj
         if isinstance(descr, STMReadBarrierDescr):
-            # FXCACHE_AT(obj) != obj
-            # XXX: optimize...
-            temp = loc_base.find_unused_reg()
-            mc.PUSH_r(temp.value)
-            mc.MOV_rr(temp.value, loc_base.value)
-            mc.AND_ri(temp.value, StmGC.FX_MASK)
-
-            # XXX: addressings like [rdx+rax*1] don't seem to work
+            # calculate: temp = obj & FX_MASK
+            assert StmGC.FX_MASK == 65535
+            mc.MOVZX16(temp_loc, loc_base)
+            # calculate: rbc + temp == obj
             rbc = self._get_stm_read_barrier_cache_addr()
             stmtlocal.tl_segment_prefix(mc)
             mc.MOV_rj(X86_64_SCRATCH_REG.value, rbc)
-            mc.ADD_rr(X86_64_SCRATCH_REG.value, temp.value)
-            mc.CMP(loc_base, mem(X86_64_SCRATCH_REG, 0))
-            mc.POP_r(temp.value)
+            mc.CMP_ra(loc_base.value, 
+                      (X86_64_SCRATCH_REG.value, temp_loc.value, 0, 0))
             mc.J_il8(rx86.Conditions['Z'], 0) # patched below
             jz_location2 = mc.get_relative_pos()
-            # <stm_read_barrier+21>:   mov    rdx,0xffffffffffffffb0
-            # <stm_read_barrier+28>:   movzx  eax,di
-            # <stm_read_barrier+31>:   mov    rdx,QWORD PTR fs:[rdx]
-            # <stm_read_barrier+35>:   cmp    rdi,QWORD PTR [rdx+rax*1]
-            # <stm_read_barrier+39>:   je     0x401f61 <stm_read_barrier+17>
-            # <stm_read_barrier+41>:   jmp    0x6a59f0 <stm_DirectReadBarrier>
-        
+
+        # obj->h_tid & GCFLAG_WRITE_BARRIER) != 0
         if isinstance(descr, STMWriteBarrierDescr):
-            # obj->h_tid & GCFLAG_WRITE_BARRIER) != 0
             if loc_base == ebp:
                 #mc.MOV_rb(X86_64_SCRATCH_REG.value, StmGC.H_TID)
                 mc.TEST8_bi(StmGC.H_TID, StmGC.GCFLAG_WRITE_BARRIER)
diff --git a/rpython/jit/backend/x86/regalloc.py 
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -797,8 +797,25 @@
                    for i in range(N)]
         self.perform_discard(op, arglocs)
 
+    def consider_cond_call_stm_b(self, op):
+        assert op.result is None
+        args = op.getarglist()
+        N = len(args)
+        assert N == 1
+        # we force all arguments in a reg (unless they are Consts),
+        # because it will be needed anyway by the following setfield_gc
+        # or setarrayitem_gc. It avoids loading it twice from the memory.
+        tmp_box = TempBox()
+        tmp_loc = self.rm.force_allocate_reg(tmp_box, args)
+        args = args + [tmp_box]
+
+        arglocs = [self.rm.make_sure_var_in_reg(op.getarg(i), args)
+                   for i in range(N)] + [tmp_loc]
+
+        self.perform_discard(op, arglocs)
+        self.rm.possibly_free_var(tmp_box)
+        
     consider_cond_call_gc_wb_array = consider_cond_call_gc_wb
-    consider_cond_call_stm_b       = consider_cond_call_gc_wb
 
     def consider_call_malloc_nursery(self, op):
         gc_ll_descr = self.assembler.cpu.gc_ll_descr
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to