Author: Remi Meier <[email protected]>
Branch: stmgc-c4
Changeset: r67490:2da83847b301
Date: 2013-10-21 14:16 +0200
http://bitbucket.org/pypy/pypy/changeset/2da83847b301/

Log:    use new thread-locals for better nursery-fastpath

diff --git a/rpython/jit/backend/llsupport/rewrite.py 
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -161,7 +161,7 @@
     def gen_malloc_frame(self, frame_info, frame):
         size_box = history.BoxInt()
         descrs = self.gc_ll_descr.getframedescrs(self.cpu)
-        if self.gc_ll_descr.kind == 'boehm' or self.gc_ll_descr.stm:
+        if self.gc_ll_descr.kind == 'boehm':
             op0 = ResOperation(rop.GETFIELD_GC, [history.ConstInt(frame_info)],
                                size_box,
                                descr=descrs.jfi_frame_depth)
@@ -171,7 +171,6 @@
             self.handle_new_array(descrs.arraydescr, op1)
         else:
             # we read size in bytes here, not the length
-            # (this path is only used in non-STM mode)
             op0 = ResOperation(rop.GETFIELD_GC, [history.ConstInt(frame_info)],
                                size_box,
                                descr=descrs.jfi_frame_size)
diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -259,10 +259,9 @@
         self._pop_all_regs_from_frame(mc, [eax, edi], self.cpu.supports_floats)
         if self.cpu.gc_ll_descr.stm:
             # load nursery_current into EDI
-            self._load_stm_thread_descriptor(mc, X86_64_SCRATCH_REG)
-            mc.MOV_rm(edi.value, 
-                      (X86_64_SCRATCH_REG.value, 
-                       StmGC.TD_NURSERY_CURRENT))
+            nc = self._get_stm_tl(rstm.get_nursery_current_adr())
+            self._tl_segment_if_stm(mc)
+            mc.MOV_rj(edi.value, nc)
         else:
             nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
             mc.MOV(edi, heap(nursery_free_adr))   # load this in EDI
@@ -2755,25 +2754,16 @@
         # XXX if the next operation is a GUARD_NO_EXCEPTION, we should
         # somehow jump over it too in the fast path
 
-    def _load_stm_thread_descriptor(self, mc, loc):
-        assert self.cpu.gc_ll_descr.stm
-        assert isinstance(loc, RegLoc)
-        
-        td = self._get_stm_tl(rstm.get_thread_descriptor_adr())
-        self._tl_segment_if_stm(mc)
-        mc.MOV(loc, heap(td))
-        mc.MOV_rm(loc.value, (loc.value, 0))
-
+    
     def _cond_allocate_in_nursery_or_slowpath(self, mc, gcmap):
         # needed for slowpath:
         # eax = nursery_current
         # edi = nursery_current + size
-        # needed here:
-        # X86_64_SCRATCH_REG = thread_descriptor
         #
         # cmp nursery_current+size > nursery_nextlimit
-        mc.CMP_rm(edi.value, (X86_64_SCRATCH_REG.value, 
-                              StmGC.TD_NURSERY_NEXTLIMIT))
+        nnl = self._get_stm_tl(rstm.get_nursery_nextlimit_adr())
+        self._tl_segment_if_stm(mc)
+        mc.CMP_rj(edi.value, nnl)
         mc.J_il8(rx86.Conditions['NA'], 0) # patched later
         jmp_adr = mc.get_relative_pos()
         #
@@ -2781,7 +2771,7 @@
         # save the gcmap
         self.push_gcmap(mc, gcmap, mov=True)
         mc.CALL(imm(self.malloc_slowpath))
-        mc.JMP_l8(0)
+        mc.JMP_l8(0) # XXX: is JMP over 1 instr good?
         jmp2_adr = mc.get_relative_pos()
         #
         # == FASTPATH ==
@@ -2789,10 +2779,10 @@
         assert 0 < offset <= 127
         mc.overwrite(jmp_adr-1, chr(offset))
         #
-        # thread_descriptor->nursery_current = nursery_current+size
-        mc.MOV_mr((X86_64_SCRATCH_REG.value,
-                   StmGC.TD_NURSERY_CURRENT),
-                   edi.value)
+        # stm_nursery_current = stm_nursery_current+size
+        nc = self._get_stm_tl(rstm.get_nursery_current_adr())
+        self._tl_segment_if_stm(mc)
+        mc.MOV_jr(nc, edi.value)
         #
         # END
         offset = mc.get_relative_pos() - jmp2_adr
@@ -2804,10 +2794,10 @@
         assert size & (WORD-1) == 0     # must be correctly aligned
         mc = self.mc
         # load nursery_current and nursery_nextlimit
-        self._load_stm_thread_descriptor(mc, X86_64_SCRATCH_REG)
-        mc.MOV_rm(eax.value, 
-                  (X86_64_SCRATCH_REG.value,
-                   StmGC.TD_NURSERY_CURRENT))
+        nc = self._get_stm_tl(rstm.get_nursery_current_adr())
+        self._tl_segment_if_stm(mc)
+        mc.MOV_rj(eax.value, nc)
+        #
         mc.LEA_rm(edi.value, (eax.value, size))
         #
         # eax=nursery_current, edi=nursery_current+size
@@ -2816,12 +2806,14 @@
     def malloc_cond_varsize_frame_stm(self, sizeloc, gcmap):
         assert self.cpu.gc_ll_descr.stm
         mc = self.mc
-        self._load_stm_thread_descriptor(mc, X86_64_SCRATCH_REG)
         if sizeloc is eax:
             self.mc.MOV(edi, sizeloc)
             sizeloc = edi
-        self.mc.MOV_rm(eax.value, (X86_64_SCRATCH_REG.value, 
-                                   StmGC.TD_NURSERY_CURRENT))
+
+        nc = self._get_stm_tl(rstm.get_nursery_current_adr())
+        self._tl_segment_if_stm(mc)
+        mc.MOV_rj(eax.value, nc)
+        
         if sizeloc is edi:
             self.mc.ADD_rr(edi.value, eax.value)
         else:
@@ -2837,6 +2829,9 @@
         assert isinstance(arraydescr, ArrayDescr)
 
         mc = self.mc
+        nc = self._get_stm_tl(rstm.get_nursery_current_adr())
+        nnl = self._get_stm_tl(rstm.get_nursery_nextlimit_adr())
+            
         # lengthloc is the length of the array, which we must not modify!
         assert lengthloc is not eax and lengthloc is not edi
         if isinstance(lengthloc, RegLoc):
@@ -2849,10 +2844,8 @@
         mc.J_il8(rx86.Conditions['A'], 0) # patched later
         jmp_adr0 = mc.get_relative_pos()
 
-        self._load_stm_thread_descriptor(mc, X86_64_SCRATCH_REG)
-        mc.MOV_rm(eax.value, 
-                  (X86_64_SCRATCH_REG.value, 
-                   StmGC.TD_NURSERY_CURRENT))
+        self._tl_segment_if_stm(mc)
+        mc.MOV_rj(eax.value, nc)
 
         if valid_addressing_size(itemsize):
             shift = get_scale(itemsize)
@@ -2873,8 +2866,8 @@
             mc.AND_ri(edi.value, ~(WORD - 1))
         # now edi contains the total size in bytes, rounded up to a multiple
         # of WORD, plus nursery_free_adr
-        mc.CMP_rm(edi.value, (X86_64_SCRATCH_REG.value, 
-                              StmGC.TD_NURSERY_NEXTLIMIT))
+        self._tl_segment_if_stm(mc)
+        mc.CMP_rj(edi.value, nnl)
         mc.J_il8(rx86.Conditions['NA'], 0) # patched later
         jmp_adr1 = mc.get_relative_pos()
         #
@@ -2905,10 +2898,9 @@
         assert 0 < offset <= 127
         mc.overwrite(jmp_adr1-1, chr(offset))
         #
-        # set thread_descriptor->nursery_current
-        mc.MOV_mr((X86_64_SCRATCH_REG.value,
-                   StmGC.TD_NURSERY_CURRENT),
-                   edi.value)
+        # set stm_nursery_current
+        self._tl_segment_if_stm(mc)
+        mc.MOV_jr(nc, edi.value)
         #
         # write down the tid
         mc.MOV(mem(eax, 0), imm(arraydescr.tid))
diff --git a/rpython/memory/gc/stmgc.py b/rpython/memory/gc/stmgc.py
--- a/rpython/memory/gc/stmgc.py
+++ b/rpython/memory/gc/stmgc.py
@@ -37,10 +37,6 @@
     malloc_zero_filled = True
     #gcflag_extra = GCFLAG_EXTRA
 
-    # SYNC with et.h
-    TD_NURSERY_CURRENT = 80
-    TD_NURSERY_NEXTLIMIT = 88
-    
     GCHDR = lltype.Struct(
         'GCPTR',
         ('h_tid', lltype.Unsigned),
diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py
--- a/rpython/rlib/rstm.py
+++ b/rpython/rlib/rstm.py
@@ -5,8 +5,18 @@
 from rpython.rlib.jit import dont_look_inside
 
 @dont_look_inside
-def get_thread_descriptor_adr():
-    addr = llop.stm_get_adr_of_thread_descriptor(llmemory.Address)
+def get_nursery_current_adr():
+    addr = llop.stm_get_adr_of_nursery_current(llmemory.Address)
+    return rffi.cast(lltype.Signed, addr)
+
+@dont_look_inside
+def get_nursery_nextlimit_adr():
+    addr = llop.stm_get_adr_of_nursery_nextlimit(llmemory.Address)
+    return rffi.cast(lltype.Signed, addr)
+
+@dont_look_inside
+def get_active_adr():
+    addr = llop.stm_get_adr_of_active(llmemory.Address)
     return rffi.cast(lltype.Signed, addr)
 
 @dont_look_inside
diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py
--- a/rpython/rtyper/llinterp.py
+++ b/rpython/rtyper/llinterp.py
@@ -951,7 +951,9 @@
     op_stm_barrier = _stm_not_implemented
     op_stm_push_root = _stm_not_implemented
     op_stm_pop_root_into = _stm_not_implemented
-    op_stm_get_adr_of_thread_descriptor = _stm_not_implemented
+    op_stm_get_adr_of_nursery_current = _stm_not_implemented
+    op_stm_get_adr_of_nursery_nextlimit = _stm_not_implemented
+    op_stm_get_adr_of_active = _stm_not_implemented
     op_stm_get_adr_of_read_barrier_cache = _stm_not_implemented
     op_stm_get_adr_of_private_rev_num = _stm_not_implemented
     op_stm_enter_callback_call = _stm_not_implemented
diff --git a/rpython/rtyper/lltypesystem/lloperation.py 
b/rpython/rtyper/lltypesystem/lloperation.py
--- a/rpython/rtyper/lltypesystem/lloperation.py
+++ b/rpython/rtyper/lltypesystem/lloperation.py
@@ -448,7 +448,9 @@
 
     'stm_get_adr_of_private_rev_num':LLOp(),
     'stm_get_adr_of_read_barrier_cache':LLOp(),
-    'stm_get_adr_of_thread_descriptor': LLOp(),
+    'stm_get_adr_of_nursery_current': LLOp(),
+    'stm_get_adr_of_nursery_nextlimit': LLOp(),
+    'stm_get_adr_of_active': LLOp(),
 
     'stm_ignored_start':      LLOp(canrun=True),
     'stm_ignored_stop':       LLOp(canrun=True),
diff --git a/rpython/translator/c/funcgen.py b/rpython/translator/c/funcgen.py
--- a/rpython/translator/c/funcgen.py
+++ b/rpython/translator/c/funcgen.py
@@ -590,7 +590,9 @@
     OP_STM_PTR_EQ                       = _OP_STM
     OP_STM_PUSH_ROOT                    = _OP_STM
     OP_STM_POP_ROOT_INTO                = _OP_STM
-    OP_STM_GET_ADR_OF_THREAD_DESCRIPTOR = _OP_STM
+    OP_STM_GET_ADR_OF_NURSERY_CURRENT   = _OP_STM
+    OP_STM_GET_ADR_OF_NURSERY_NEXTLIMIT = _OP_STM
+    OP_STM_GET_ADR_OF_ACTIVE            = _OP_STM
     OP_STM_GET_ROOT_STACK_TOP           = _OP_STM
     OP_STM_GET_ADR_OF_PRIVATE_REV_NUM   = _OP_STM
     OP_STM_GET_ADR_OF_READ_BARRIER_CACHE= _OP_STM
diff --git a/rpython/translator/stm/funcgen.py 
b/rpython/translator/stm/funcgen.py
--- a/rpython/translator/stm/funcgen.py
+++ b/rpython/translator/stm/funcgen.py
@@ -114,9 +114,19 @@
     return '%s = (%s)stm_pop_root();' % (
         arg0, cdecl(funcgen.lltypename(op.args[0]), ''))
 
-def stm_get_adr_of_thread_descriptor(funcgen, op):
+def stm_get_adr_of_nursery_current(funcgen, op):
     result = funcgen.expr(op.result)
-    return '%s = (%s)&thread_descriptor;' % (
+    return '%s = (%s)&stm_nursery_current;' % (
+        result, cdecl(funcgen.lltypename(op.result), ''))
+
+def stm_get_adr_of_nursery_nextlimit(funcgen, op):
+    result = funcgen.expr(op.result)
+    return '%s = (%s)&stm_nursery_nextlimit;' % (
+        result, cdecl(funcgen.lltypename(op.result), ''))
+
+def stm_get_adr_of_active(funcgen, op):
+    result = funcgen.expr(op.result)
+    return '%s = (%s)&stm_active;' % (
         result, cdecl(funcgen.lltypename(op.result), ''))
     
 def stm_get_root_stack_top(funcgen, op):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to