Author: Armin Rigo <[email protected]>
Branch: stm-thread-2
Changeset: r57764:d0f15e8614e4
Date: 2012-10-03 14:34 +0200
http://bitbucket.org/pypy/pypy/changeset/d0f15e8614e4/
Log: fix fix fix in-progress
diff --git a/pypy/jit/backend/x86/assembler.py
b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -30,7 +30,6 @@
from pypy.rlib.objectmodel import we_are_translated, specialize
from pypy.jit.backend.x86 import rx86, regloc, codebuf
from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.backend.x86.support import values_array
from pypy.jit.backend.x86 import support
from pypy.rlib.debug import (debug_print, debug_start, debug_stop,
have_debug_prints)
@@ -41,6 +40,7 @@
from pypy.jit.codewriter import longlong
from pypy.rlib.rarithmetic import intmask
from pypy.rlib.objectmodel import compute_unique_id
+from pypy.jit.backend.x86 import stmtlocal
# darwin requires the stack to be 16 bytes aligned on calls. Same for gcc
4.5.0,
# better safe than sorry
@@ -68,16 +68,11 @@
_regalloc = None
_output_loop_log = None
- def __init__(self, cpu, translate_support_code=False,
- failargs_limit=1000):
+ def __init__(self, cpu, translate_support_code=False):
self.cpu = cpu
self.verbose = False
self.rtyper = cpu.rtyper
- self.fail_boxes_int = values_array(lltype.Signed, failargs_limit)
- self.fail_boxes_ptr = values_array(llmemory.GCREF, failargs_limit)
- self.fail_boxes_float = values_array(longlong.FLOATSTORAGE,
- failargs_limit)
- self.fail_ebp = 0
+ self.asmtlocals = {}
self.loop_run_counters = []
self.float_const_neg_addr = 0
self.float_const_abs_addr = 0
@@ -87,7 +82,6 @@
self.setup_failure_recovery()
self._debug = False
self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
- self.fail_boxes_count = 0
self.datablockwrapper = None
self.stack_check_slowpath = 0
self.propagate_exception_path = 0
@@ -95,7 +89,7 @@
self.teardown()
def leave_jitted_hook(self):
- ptrs = self.fail_boxes_ptr.ar
+ ptrs = stmtlocal.get_asm_tlocal(self.cpu)
llop.gc_assume_young_pointers(lltype.Void,
llmemory.cast_ptr_to_adr(ptrs))
@@ -140,9 +134,9 @@
self.mc = codebuf.MachineCodeBlockWrapper()
#assert self.datablockwrapper is None --- but obscure case
# possible, e.g. getting MemoryError and continuing
- allblocks = self.get_asmmemmgr_blocks(looptoken)
+ self.allblocks = self.get_asmmemmgr_blocks(looptoken)
self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
- allblocks)
+ self.allblocks)
self.target_tokens_currently_compiling = {}
def teardown(self):
@@ -151,6 +145,7 @@
self.pending_memoryerror_trampoline_from = None
self.mc = None
self.current_clt = None
+ self.allblocks = None
def finish_once(self):
if self._debug:
@@ -554,7 +549,7 @@
self.write_pending_failure_recoveries()
full_size = self.mc.get_relative_pos()
#
- rawstart = self.materialize_loop(looptoken)
+ rawstart = self.materialize_loop()
debug_start("jit-backend-addr")
debug_print("Loop %d (%s) has address %x to %x (bootstrap %x)" % (
looptoken.number, loopname,
@@ -613,7 +608,7 @@
self.write_pending_failure_recoveries()
fullsize = self.mc.get_relative_pos()
#
- rawstart = self.materialize_loop(original_loop_token)
+ rawstart = self.materialize_loop()
debug_start("jit-backend-addr")
debug_print("bridge out of Guard %d has address %x to %x" %
(descr_number, rawstart, rawstart + codeendpos))
@@ -692,11 +687,10 @@
clt.asmmemmgr_blocks = []
return clt.asmmemmgr_blocks
- def materialize_loop(self, looptoken):
+ def materialize_loop(self):
self.datablockwrapper.done() # finish using cpu.asmmemmgr
self.datablockwrapper = None
- allblocks = self.get_asmmemmgr_blocks(looptoken)
- return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
+ return self.mc.materialize(self.cpu.asmmemmgr, self.allblocks,
self.cpu.gc_ll_descr.gcrootmap)
def _register_counter(self, tp, number, token):
@@ -1900,12 +1894,9 @@
assert mc.get_relative_pos() == start + 13
# write tight data that describes the failure recovery
self.write_failure_recovery_description(mc, guardtok.failargs,
- guardtok.fail_locs)
- # write the fail_index too
- mc.writeimm32(fail_index)
- # for testing the decoding, write a final byte 0xCC
+ guardtok.fail_locs,
+ fail_index)
if not we_are_translated():
- mc.writechar('\xCC')
faillocs = [loc for loc in guardtok.fail_locs if loc is not None]
guardtok.faildescr._x86_debug_faillocs = faillocs
return startpos
@@ -1919,7 +1910,8 @@
CODE_HOLE = 4 | DESCR_SPECIAL
CODE_INPUTARG = 8 | DESCR_SPECIAL
- def write_failure_recovery_description(self, mc, failargs, locs):
+ def write_failure_recovery_description(self, mc, failargs, locs,
+ fail_index):
for i in range(len(failargs)):
arg = failargs[i]
if arg is not None:
@@ -1950,7 +1942,12 @@
mc.writechar(chr(n))
mc.writechar(chr(self.CODE_STOP))
# assert that the fail_boxes lists are big enough
- assert len(failargs) <= self.fail_boxes_int.SIZE
+ assert len(failargs) <= stmtlocal.FAILARGS_LIMIT
+ # write the fail_index too
+ mc.writeimm32(fail_index)
+ # for testing the decoding, write a final byte 0xCC
+ if not we_are_translated():
+ mc.writechar('\xCC')
def rebuild_faillocs_from_descr(self, bytecode):
from pypy.jit.backend.x86.regalloc import X86FrameManager
@@ -2001,7 +1998,8 @@
@rgc.no_collect
def grab_frame_values(self, bytecode, frame_addr, allregisters):
# no malloc allowed here!!
- self.fail_ebp = allregisters[16 + ebp.value]
+ asmtlocal = stmtlocal.get_asm_tlocal(self.cpu)
+ asmtlocal.fail_ebp = allregisters[16 + ebp.value]
code_inputarg = False
num = 0
value_hi = 0
@@ -2055,11 +2053,11 @@
# store the loaded value into fail_boxes_<type>
if kind == self.DESCR_INT:
- tgt = self.fail_boxes_int.get_addr_for_num(num)
+ tgt = stmtlocal.fail_boxes_int_addr(asmtlocal, num)
elif kind == self.DESCR_REF:
- tgt = self.fail_boxes_ptr.get_addr_for_num(num)
+ tgt = stmtlocal.fail_boxes_ptr_addr(asmtlocal, num)
elif kind == self.DESCR_FLOAT:
- tgt = self.fail_boxes_float.get_addr_for_num(num)
+ tgt = stmtlocal.fail_boxes_float_addr(asmtlocal, num)
if WORD == 4:
rffi.cast(rffi.LONGP, tgt)[1] = value_hi
else:
@@ -2069,7 +2067,7 @@
#
if not we_are_translated():
assert bytecode[4] == 0xCC
- self.fail_boxes_count = num
+ asmtlocal.fail_boxes_count = num
fail_index = rffi.cast(rffi.INTP, bytecode)[0]
fail_index = rffi.cast(lltype.Signed, fail_index)
return fail_index
@@ -2152,7 +2150,45 @@
self.failure_recovery_code[exc + 2 * withfloats] = rawstart
self.mc = None
- def generate_failure(self, fail_index, locs, exc, locs_are_ref):
+ def generate_failure(self, fail_index, locs, boxes):
+ mc2 = codebuf.MachineCodeBlockWrapper()
+ self.write_failure_recovery_description(mc2, boxes, locs, fail_index)
+ bytecode = mc2.materialize(self.cpu.asmmemmgr, self.allblocks)
+ #
+ failure_recovery_func = llhelper(self._FAILURE_RECOVERY_FUNC,
+ self.failure_recovery_func)
+ failure_recovery_func = rffi.cast(lltype.Signed,
+ failure_recovery_func)
+ mc = self.mc
+ # Push the address of the recovery bytecode
+ mc.PUSH(imm(bytecode))
+ # Reserve space for all general purpose registers
+ mc.ADD_ri(esp.value, -self.cpu.NUM_REGS * WORD)
+ # Save the surviving registers in there
+ for loc in locs:
+ if isinstance(loc, RegLoc):
+ assert not loc.is_xmm, "XXX returning an xmm reg: fixme"
+ mc.MOV_sr(loc.value * WORD, loc.value)
+ # ebx/rbx is callee-save in both i386 and x86-64
+ mc.MOV_rr(ebx.value, esp.value)
+
+ addr = self.cpu.get_on_leave_jitted_int(save_exception=False)
+ self.mc.CALL(imm(addr))
+
+ if IS_X86_32:
+ mc.PUSH_r(ebx.value)
+ elif IS_X86_64:
+ mc.MOV_rr(edi.value, ebx.value)
+ else:
+ raise AssertionError("Shouldn't happen")
+ mc.CALL(imm(failure_recovery_func))
+ # returns in eax the fail_index
+ self._call_footer()
+ return
+
+ # ---------- below, the original code, more efficient but not
+ # ---------- ready to handle stm thread-locals
+ xxxxxxxx
self.mc.begin_reuse_scratch_register()
for i in range(len(locs)):
loc = locs[i]
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -486,14 +486,13 @@
consider_guard_isnull = _consider_guard
def consider_finish(self, op):
- locs = [self.loc(op.getarg(i)) for i in range(op.numargs())]
- locs_are_ref = [op.getarg(i).type == REF for i in range(op.numargs())]
+ boxes = [op.getarg(i) for i in range(op.numargs())]
+ locs = [self.loc(box) for box in boxes]
fail_index = self.assembler.cpu.get_fail_descr_number(op.getdescr())
# note: no exception should currently be set in llop.get_exception_addr
# even if this finish may be an exit_frame_with_exception (in this case
# the exception instance is in locs[0]).
- self.assembler.generate_failure(fail_index, locs, False,
- locs_are_ref)
+ self.assembler.generate_failure(fail_index, locs, boxes)
self.possibly_free_vars_for_op(op)
def consider_guard_no_exception(self, op):
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -10,7 +10,7 @@
from pypy.jit.backend.x86.arch import FORCE_INDEX_OFS, IS_X86_32
from pypy.jit.backend.x86.profagent import ProfileAgent
from pypy.jit.backend.llsupport.llmodel import AbstractLLCPU
-from pypy.jit.backend.x86 import regloc
+from pypy.jit.backend.x86 import regloc, stmtlocal
import sys
from pypy.tool.ansi_print import ansi_log
@@ -50,11 +50,8 @@
def setup(self):
if self.opts is not None:
- failargs_limit = self.opts.failargs_limit
- else:
- failargs_limit = 1000
- self.assembler = Assembler386(self, self.translate_support_code,
- failargs_limit)
+ assert self.opts.failargs_limit == stmtlocal.FAILARGS_LIMIT
+ self.assembler = Assembler386(self, self.translate_support_code)
def get_on_leave_jitted_hook(self):
return self.assembler.leave_jitted_hook
@@ -96,31 +93,32 @@
original_loop_token, log=log)
def get_latest_value_int(self, index):
- return self.assembler.fail_boxes_int.getitem(index)
+ return stmtlocal.get_asm_tlocal(self).fail_boxes_int[index]
def get_latest_value_float(self, index):
- return self.assembler.fail_boxes_float.getitem(index)
+ return stmtlocal.get_asm_tlocal(self).fail_boxes_float[index]
def get_latest_value_ref(self, index):
- return self.assembler.fail_boxes_ptr.getitem(index)
+ return stmtlocal.get_asm_tlocal(self).fail_boxes_ptr[index]
def get_latest_value_count(self):
- return self.assembler.fail_boxes_count
+ return stmtlocal.get_asm_tlocal(self).fail_boxes_count
def clear_latest_values(self, count):
- setitem = self.assembler.fail_boxes_ptr.setitem
+ asmtlocal = stmtlocal.get_asm_tlocal(self)
null = lltype.nullptr(llmemory.GCREF.TO)
for index in range(count):
- setitem(index, null)
+ asmtlocal.fail_boxes_ptr[index] = null
def get_latest_force_token(self):
# the FORCE_TOKEN operation and this helper both return 'ebp'.
- return self.assembler.fail_ebp
+ return stmtlocal.get_asm_tlocal(self).fail_ebp
def make_execute_token(self, *ARGS):
FUNCPTR = lltype.Ptr(lltype.FuncType(ARGS, lltype.Signed))
#
def execute_token(executable_token, *args):
+ stmtlocal.prepare_asm_tlocal(self)
clt = executable_token.compiled_loop_token
assert len(args) == clt._debug_nbargs
#
diff --git a/pypy/jit/backend/x86/stmtlocal.py
b/pypy/jit/backend/x86/stmtlocal.py
--- a/pypy/jit/backend/x86/stmtlocal.py
+++ b/pypy/jit/backend/x86/stmtlocal.py
@@ -2,9 +2,14 @@
# This is hopefully a temporary hack for x86 and x86-64
#
-from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rpython.lltypesystem import lltype, rffi, llmemory
+from pypy.rpython import annlowlevel
+from pypy.jit.codewriter import longlong
from pypy.translator.tool.cbuild import ExternalCompilationInfo
from pypy.jit.backend.x86.arch import WORD
+from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib import rgc
+from pypy.module.thread.ll_thread import get_ident
if WORD == 4:
@@ -34,3 +39,47 @@
mc.writechar('\x65') # %gs:
else:
mc.writechar('\x64') # %fs:
+
+# ____________________________________________________________
+
+
+FAILARGS_LIMIT = 1000 # xxx repeated constant
+
+ASSEMBLER_THREAD_LOCAL = lltype.GcStruct(
+ 'ASSEMBLER_THREAD_LOCAL',
+ ('fail_ebp', lltype.Signed),
+ ('fail_boxes_count', lltype.Signed),
+ ('fail_boxes_ptr', lltype.FixedSizeArray(llmemory.GCREF, FAILARGS_LIMIT)),
+ ('fail_boxes_int', lltype.FixedSizeArray(lltype.Signed, FAILARGS_LIMIT)),
+ ('fail_boxes_float', lltype.FixedSizeArray(longlong.FLOATSTORAGE,
+ FAILARGS_LIMIT)),
+ )
+
[email protected]_collect
+def get_asm_tlocal(cpu):
+ id = get_ident()
+ return cpu.assembler.asmtlocals[id]
+
+def prepare_asm_tlocal(cpu):
+ id = get_ident()
+ if id not in cpu.assembler.asmtlocals:
+ cpu.assembler.asmtlocals[id] = lltype.malloc(ASSEMBLER_THREAD_LOCAL)
+
+def fail_boxes_int_addr(tlocal, num):
+ tgt = llmemory.cast_ptr_to_adr(tlocal)
+ tgt += rffi.offsetof(ASSEMBLER_THREAD_LOCAL, 'fail_boxes_int')
+ tgt += num * rffi.sizeof(lltype.Signed)
+ return rffi.cast(lltype.Signed, tgt)
+
+def fail_boxes_ptr_addr(tlocal, num):
+ tgt = llmemory.cast_ptr_to_adr(tlocal)
+ tgt += rffi.offsetof(ASSEMBLER_THREAD_LOCAL, 'fail_boxes_ptr')
+ tgt = rffi.cast(lltype.Signed, tgt)
+ tgt += num * rffi.sizeof(llmemory.GCREF)
+ return tgt
+
+def fail_boxes_float_addr(tlocal, num):
+ tgt = llmemory.cast_ptr_to_adr(tlocal)
+ tgt += rffi.offsetof(ASSEMBLER_THREAD_LOCAL, 'fail_boxes_float')
+ tgt += num * rffi.sizeof(longlong.FLOATSTORAGE)
+ return rffi.cast(lltype.Signed, tgt)
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit