Author: Maciej Fijalkowski <[email protected]>
Branch: jitframe-on-heap
Changeset: r60685:fad2cb706322
Date: 2013-01-29 16:20 +0200
http://bitbucket.org/pypy/pypy/changeset/fad2cb706322/

Log:    merge

diff --git a/rpython/jit/backend/arm/assembler.py 
b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -8,7 +8,7 @@
                                     N_REGISTERS_SAVED_BY_MALLOC, \
                                     JITFRAME_FIXED_SIZE, FRAME_FIXED_SIZE
 from rpython.jit.backend.arm.codebuilder import ARMv7Builder, 
OverwritingBuilder
-from rpython.jit.backend.arm.locations import get_fp_offset
+from rpython.jit.backend.arm.locations import get_fp_offset, imm
 from rpython.jit.backend.arm.regalloc import (Regalloc, ARMFrameManager,
                     CoreRegisterManager, check_imm_arg,
                     operations as regalloc_operations,
@@ -118,6 +118,9 @@
         self.gcmap_for_finish = lltype.malloc(jitframe.GCMAP, 1, zero=True)
         self.gcmap_for_finish[0] = r_uint(1)
 
+    def setup_failure_recovery(self):
+        self.failure_recovery_code = [0, 0, 0, 0]
+
     def finish_once(self):
         if self._debug:
             debug_start('jit-backend-counts')
@@ -334,199 +337,6 @@
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.wb_slowpath[withcards + 2 * withfloats] = rawstart
 
-    def setup_failure_recovery(self):
-
-        #@rgc.no_collect -- XXX still true, but hacked gc_set_extra_threshold
-        def failure_recovery_func(mem_loc, frame_pointer, stack_pointer):
-            """mem_loc is a structure in memory describing where the values for
-            the failargs are stored.  frame loc is the address of the frame
-            pointer for the frame to be decoded frame """
-            vfp_registers = rffi.cast(rffi.LONGP, stack_pointer)
-            registers = rffi.ptradd(vfp_registers, 2*len(r.all_vfp_regs))
-            registers = rffi.cast(rffi.LONGP, registers)
-            bytecode = rffi.cast(rffi.UCHARP, mem_loc)
-            return self.grab_frame_values(self.cpu, bytecode, frame_pointer,
-                                                    registers, vfp_registers)
-        self.failure_recovery_code = [0, 0, 0, 0]
-
-        self.failure_recovery_func = failure_recovery_func
-
-    _FAILURE_RECOVERY_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP] * 3,
-                                                        llmemory.GCREF))
-
-    @staticmethod
-    #@rgc.no_collect -- XXX still true, but hacked gc_set_extra_threshold
-    def grab_frame_values(cpu, bytecode, frame_pointer,
-                                                registers, vfp_registers):
-        # no malloc allowed here!!  xxx apart from one, hacking a lot
-        force_index = rffi.cast(lltype.Signed, frame_pointer)
-        num = 0
-        deadframe = lltype.nullptr(jitframe.DEADFRAME)
-        # step 1: lots of mess just to count the final value of 'num'
-        bytecode1 = bytecode
-        while 1:
-            code = rffi.cast(lltype.Signed, bytecode1[0])
-            bytecode1 = rffi.ptradd(bytecode1, 1)
-            if code >= AssemblerARM.CODE_FROMSTACK:
-                while code > 0x7F:
-                    code = rffi.cast(lltype.Signed, bytecode1[0])
-                    bytecode1 = rffi.ptradd(bytecode1, 1)
-            else:
-                kind = code & 3
-                if kind == AssemblerARM.DESCR_SPECIAL:
-                    if code == AssemblerARM.CODE_HOLE:
-                        num += 1
-                        continue
-                    if code == AssemblerARM.CODE_INPUTARG:
-                        continue
-                    if code == AssemblerARM.CODE_FORCED:
-                        # resuming from a GUARD_NOT_FORCED
-                        token = force_index
-                        deadframe = (
-                            cpu.assembler.force_token_to_dead_frame.pop(token))
-                        deadframe = lltype.cast_opaque_ptr(
-                            jitframe.DEADFRAMEPTR, deadframe)
-                        continue
-                    assert code == AssemblerARM.CODE_STOP
-                    break
-            num += 1
-
-        # allocate the deadframe
-        if not deadframe:
-            # Remove the "reserve" at the end of the nursery.  This means
-            # that it is guaranteed that the following malloc() works
-            # without requiring a collect(), but it needs to be re-added
-            # as soon as possible.
-            cpu.gc_clear_extra_threshold()
-            assert num <= cpu.get_failargs_limit()
-            try:
-                deadframe = lltype.malloc(jitframe.DEADFRAME, num)
-            except MemoryError:
-                fatalerror("memory usage error in grab_frame_values")
-        # fill it
-        code_inputarg = False
-        num = 0
-        value_hi = 0
-        while 1:
-            # decode the next instruction from the bytecode
-            code = rffi.cast(lltype.Signed, bytecode[0])
-            bytecode = rffi.ptradd(bytecode, 1)
-            if code >= AssemblerARM.CODE_FROMSTACK:
-                if code > 0x7F:
-                    shift = 7
-                    code &= 0x7F
-                    while True:
-                        nextcode = rffi.cast(lltype.Signed, bytecode[0])
-                        bytecode = rffi.ptradd(bytecode, 1)
-                        code |= (nextcode & 0x7F) << shift
-                        shift += 7
-                        if nextcode <= 0x7F:
-                            break
-                # load the value from the stack
-                kind = code & 3
-                code = (code - AssemblerARM.CODE_FROMSTACK) >> 2
-                if code_inputarg:
-                    code = ~code
-                    code_inputarg = False
-                stackloc = force_index - get_fp_offset(int(code))
-                value = rffi.cast(rffi.LONGP, stackloc)[0]
-                if kind == AssemblerARM.DESCR_FLOAT:
-                    assert WORD == 4
-                    value_hi = value
-                    value = rffi.cast(rffi.LONGP, stackloc - WORD)[0]
-            else:
-                kind = code & 3
-                if kind == AssemblerARM.DESCR_SPECIAL:
-                    if code == AssemblerARM.CODE_HOLE:
-                        num += 1
-                        continue
-                    if code == AssemblerARM.CODE_INPUTARG:
-                        code_inputarg = True
-                        continue
-                    if code == AssemblerARM.CODE_FORCED:
-                        continue
-                    assert code == AssemblerARM.CODE_STOP
-                    break
-                # 'code' identifies a register: load its value
-                code >>= 2
-                if kind == AssemblerARM.DESCR_FLOAT:
-                    if WORD == 4:
-                        value = vfp_registers[2*code]
-                        value_hi = vfp_registers[2*code + 1]
-                    else:
-                        value = registers[code]
-                else:
-                    value = registers[code]
-            # store the loaded value into fail_boxes_<type>
-            if kind == AssemblerARM.DESCR_INT:
-                deadframe.jf_values[num].int = value
-            elif kind == AssemblerARM.DESCR_REF:
-                deadframe.jf_values[num].ref = rffi.cast(llmemory.GCREF, value)
-            elif kind == AssemblerARM.DESCR_FLOAT:
-                assert WORD == 4
-                assert not longlong.is_64_bit
-                floatvalue = rffi.cast(lltype.SignedLongLong, value_hi)
-                floatvalue <<= 32
-                floatvalue |= rffi.cast(lltype.SignedLongLong,
-                                        rffi.cast(lltype.Unsigned, value))
-                deadframe.jf_values[num].float = floatvalue
-            else:
-                assert 0, "bogus kind"
-            num += 1
-        #
-        assert num == len(deadframe.jf_values)
-        if not we_are_translated():
-            assert bytecode[4] == 0xCC
-        fail_index = rffi.cast(rffi.INTP, bytecode)[0]
-        fail_descr = cpu.get_fail_descr_from_number(fail_index)
-        deadframe.jf_descr = fail_descr.hide(cpu)
-        return lltype.cast_opaque_ptr(llmemory.GCREF, deadframe)
-
-    def decode_inputargs(self, code):
-        descr_to_box_type = [REF, INT, FLOAT]
-        bytecode = rffi.cast(rffi.UCHARP, code)
-        arglocs = []
-        code_inputarg = False
-        while 1:
-            # decode the next instruction from the bytecode
-            code = rffi.cast(lltype.Signed, bytecode[0])
-            bytecode = rffi.ptradd(bytecode, 1)
-            if code >= self.CODE_FROMSTACK:
-                # 'code' identifies a stack location
-                if code > 0x7F:
-                    shift = 7
-                    code &= 0x7F
-                    while True:
-                        nextcode = rffi.cast(lltype.Signed, bytecode[0])
-                        bytecode = rffi.ptradd(bytecode, 1)
-                        code |= (nextcode & 0x7F) << shift
-                        shift += 7
-                        if nextcode <= 0x7F:
-                            break
-                kind = code & 3
-                code = (code - self.CODE_FROMSTACK) >> 2
-                if code_inputarg:
-                    code = ~code
-                    code_inputarg = False
-                loc = ARMFrameManager.frame_pos(code, descr_to_box_type[kind])
-            elif code == self.CODE_STOP:
-                break
-            elif code == self.CODE_HOLE:
-                continue
-            elif code == self.CODE_INPUTARG:
-                code_inputarg = True
-                continue
-            else:
-                # 'code' identifies a register
-                kind = code & 3
-                code >>= 2
-                if kind == self.DESCR_FLOAT:
-                    loc = r.all_vfp_regs[code]
-                else:
-                    loc = r.all_regs[code]
-            arglocs.append(loc)
-        return arglocs[:]
-
     def _build_malloc_slowpath(self):
         mc = ARMv7Builder()
         if self.cpu.supports_floats:
@@ -562,48 +372,63 @@
         # see ../x86/assembler.py:propagate_memoryerror_if_eax_is_null
         self.mc.CMP_ri(r.r0.value, 0)
         self.mc.B(self.propagate_exception_path, c=c.EQ)
+    
+    def _push_all_regs_to_jitframe(self, mc, ignored_regs, withfloats,
+                                callee_only=False):
+        if callee_only:
+            regs = CoreRegisterManager.save_around_call_regs
+        else:
+            regs = CoreRegisterManager.all_regs
+        # use STMDB ops here
+        for i, gpr in enumerate(regs):
+            if gpr in ignored_regs:
+                continue
+            mc.STR_ri(gpr.value, r.fp.value, i * WORD)
+        if withfloats:
+            assert 0, 'implement me'
 
     def _build_failure_recovery(self, exc, withfloats=False):
         mc = ARMv7Builder()
-        failure_recovery = llhelper(self._FAILURE_RECOVERY_FUNC,
-                                            self.failure_recovery_func)
+        self._push_all_regs_to_jitframe(mc, [], withfloats)
         self._insert_checks(mc)
-        if withfloats:
-            f = r.all_vfp_regs
-        else:
-            f = []
-        with saved_registers(mc, r.all_regs, f):
-            if exc:
-                # We might have an exception pending.  Load it into r4
-                # (this is a register saved across calls)
-                mc.gen_load_int(r.r5.value, self.cpu.pos_exc_value())
-                mc.LDR_ri(r.r4.value, r.r5.value)
-                # clear the exc flags
-                mc.gen_load_int(r.r6.value, 0)
-                mc.STR_ri(r.r6.value, r.r5.value)
-                mc.gen_load_int(r.r5.value, self.cpu.pos_exception())
-                mc.STR_ri(r.r6.value, r.r5.value)
-            # move mem block address, to r0 to pass as
-            mc.MOV_rr(r.r0.value, r.lr.value)
-            # pass the current frame pointer as second param
-            mc.MOV_rr(r.r1.value, r.fp.value)
-            # pass the current stack pointer as third param
-            mc.MOV_rr(r.r2.value, r.sp.value)
-            self._insert_checks(mc)
-            mc.BL(rffi.cast(lltype.Signed, failure_recovery))
-            if exc:
-                # save ebx into 'jf_guard_exc'
-                from rpython.jit.backend.llsupport.descr import 
unpack_fielddescr
-                descrs = self.cpu.gc_ll_descr.getframedescrs(self.cpu)
-                offset, size, _ = unpack_fielddescr(descrs.jf_guard_exc)
-                mc.STR_rr(r.r4.value, r.r0.value, offset, cond=c.AL)
-            mc.MOV_rr(r.ip.value, r.r0.value)
-        mc.MOV_rr(r.r0.value, r.ip.value)
-        self.gen_func_epilog(mc=mc)
-        rawstart = mc.materialize(self.cpu.asmmemmgr, [],
-                                   self.cpu.gc_ll_descr.gcrootmap)
+
+        if exc:
+            # We might have an exception pending.  Load it into r4
+            # (this is a register saved across calls)
+            mc.gen_load_int(r.r5.value, self.cpu.pos_exc_value())
+            mc.LDR_ri(r.r4.value, r.r5.value)
+            # clear the exc flags
+            mc.gen_load_int(r.r6.value, 0)
+            mc.STR_ri(r.r6.value, r.r5.value) # pos_exc_value is still in r5
+            mc.gen_load_int(r.r5.value, self.cpu.pos_exception())
+            mc.STR_ri(r.r6.value, r.r5.value)
+            # save r4 into 'jf_guard_exc'
+            offset = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
+            assert check_imm_arg(abs(offset))
+            mc.STR_ri(r.r4.value, r.fp.value, imm=offset)
+        # now we return from the complete frame, which starts from
+        # _call_header_with_stack_check().  The LEA in _call_footer below
+        # throws away most of the frame, including all the PUSHes that we
+        # did just above.
+        ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
+        assert check_imm_arg(abs(ofs))
+        ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap')
+        assert check_imm_arg(abs(ofs2))
+        base_ofs = self.cpu.get_baseofs_of_frame_field()
+        # store the gcmap
+        mc.POP([r.ip.value])
+        mc.STR_ri(r.ip.value, r.fp.value, imm=ofs2)
+        # store the descr
+        mc.POP([r.ip.value])
+        mc.STR_ri(r.ip.value, r.fp.value, imm=ofs)
+
+        # set return value
+        assert check_imm_arg(base_ofs)
+        mc.SUB_ri(r.r0.value, r.fp.value, base_ofs)
+
+        self.gen_func_epilog(mc)
+        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.failure_recovery_code[exc + 2 * withfloats] = rawstart
-        self.mc = None
 
     DESCR_REF       = 0x00
     DESCR_INT       = 0x01
@@ -649,30 +474,37 @@
 
 
     def generate_quick_failure(self, guardtok, fcond=c.AL):
-        assert isinstance(guardtok.save_exc, bool)
-        fail_index = self.cpu.get_fail_descr_number(guardtok.descr)
+        assert isinstance(guardtok.exc, bool)
         startpos = self.mc.currpos()
         withfloats = False
         for box in guardtok.failargs:
             if box is not None and box.type == FLOAT:
                 withfloats = True
                 break
-        exc = guardtok.save_exc
+        exc = guardtok.exc
         target = self.failure_recovery_code[exc + 2 * withfloats]
-        assert target != 0
+        fail_descr = cast_instance_to_gcref(guardtok.faildescr)
+        fail_descr = rffi.cast(lltype.Signed, fail_descr)
+        positions = [0] * len(guardtok.fail_locs)
+        for i, loc in enumerate(guardtok.fail_locs):
+            if loc is None:
+                positions[i] = -1
+            elif loc.is_stack():
+                positions[i] = loc.value
+            else:
+                if loc.is_reg():
+                    assert loc is not r.fp # for now
+                    v = loc.value
+                else:
+                    assert 0, 'fix for floats'
+                    assert loc.is_vfp_reg()
+                    #v = len(VFPRegisterManager.all_regs) + loc.value
+                positions[i] = v * WORD
+        # write down the positions of locs
+        guardtok.faildescr.rd_locs = positions
+        self.regalloc_push(imm(fail_descr))
+        self.push_gcmap(self.mc, gcmap=guardtok.gcmap, push=True)
         self.mc.BL(target)
-        # write tight data that describes the failure recovery
-        if guardtok.is_guard_not_forced:
-            self.mc.writechar(chr(self.CODE_FORCED))
-        self.write_failure_recovery_description(guardtok.descr,
-                                guardtok.failargs, guardtok.faillocs[1:])
-        self.mc.write32(fail_index)
-        # for testing the decoding, write a final byte 0xCC
-        if not we_are_translated():
-            self.mc.writechar('\xCC')
-            faillocs = [loc for loc in guardtok.faillocs if loc is not None]
-            guardtok.descr._arm_debug_faillocs = faillocs
-        self.align()
         return startpos
 
     def align(self):
@@ -929,13 +761,11 @@
         for tok in self.pending_guards:
             #generate the exit stub and the encoded representation
             tok.pos_recovery_stub = self.generate_quick_failure(tok)
-            # store info on the descr
-            tok.descr._arm_current_frame_depth = tok.faillocs[0].getint()
 
     def process_pending_guards(self, block_start):
         clt = self.current_clt
         for tok in self.pending_guards:
-            descr = tok.descr
+            descr = tok.faildescr
             assert isinstance(descr, AbstractFailDescr)
             failure_recovery_pos = block_start + tok.pos_recovery_stub
             descr._arm_failure_recovery_block = failure_recovery_pos
@@ -1395,16 +1225,18 @@
         # keep the ref alive
         self.current_clt.allgcrefs.append(gcmapref)
         rgc._make_sure_does_not_move(gcmapref)
-        pass
-        #if push:
-        #    mc.PUSH(imm(rffi.cast(lltype.Signed, gcmapref)))
-        #elif mov:
-        #    mc.MOV(RawEspLoc(0, REF),
-        #           imm(rffi.cast(lltype.Signed, gcmapref)))
-        #else:
-        #    assert store
-        #    ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
-        #    mc.MOV(raw_stack(ofs), imm(rffi.cast(lltype.Signed, gcmapref)))
+        if push:
+            mc.gen_load_int(r.ip.value, rffi.cast(lltype.Signed, gcmapref))
+            mc.PUSH([r.ip.value])
+        elif mov:
+            assert 0
+            mc.MOV(RawEspLoc(0, REF),
+                   imm(rffi.cast(lltype.Signed, gcmapref)))
+        else:
+            assert store
+            ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
+            mc.gen_load_int(r.ip.value, rffi.cast(lltype.Signed, gcmapref))
+            mc.STR_ri(r.ip.value, r.fp.value, imm=ofs)
 
     def pop_gcmap(self, mc):
         ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
diff --git a/rpython/jit/backend/arm/codebuilder.py 
b/rpython/jit/backend/arm/codebuilder.py
--- a/rpython/jit/backend/arm/codebuilder.py
+++ b/rpython/jit/backend/arm/codebuilder.py
@@ -1,7 +1,7 @@
 from rpython.jit.backend.arm import conditions as cond
 from rpython.jit.backend.arm import registers as reg
 from rpython.jit.backend.arm import support
-from rpython.jit.backend.arm.arch import (WORD, FUNC_ALIGN)
+from rpython.jit.backend.arm.arch import (WORD, FUNC_ALIGN, PC_OFFSET)
 from rpython.jit.backend.arm.instruction_builder import define_instructions
 from rpython.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin
 from rpython.rlib.objectmodel import we_are_translated
@@ -186,7 +186,7 @@
 
     def B_offs(self, target_ofs, c=cond.AL):
         pos = self.currpos()
-        target_ofs = target_ofs - (pos + arch.PC_OFFSET)
+        target_ofs = target_ofs - (pos + PC_OFFSET)
         assert target_ofs & 0x3 == 0
         self.write32(c << 28 | 0xA << 24 | (target_ofs >> 2) & 0xFFFFFF)
 
diff --git a/rpython/jit/backend/arm/opassembler.py 
b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -2,7 +2,7 @@
 from rpython.jit.backend.arm import conditions as c
 from rpython.jit.backend.arm import registers as r
 from rpython.jit.backend.arm import shift
-from rpython.jit.backend.arm.arch import WORD, DOUBLE_WORD
+from rpython.jit.backend.arm.arch import WORD, DOUBLE_WORD, JITFRAME_FIXED_SIZE
 
 from rpython.jit.backend.arm.helper.assembler import 
(gen_emit_op_by_helper_call,
                                                 gen_emit_op_unary_cmp,
@@ -21,7 +21,7 @@
 from rpython.jit.backend.arm.jump import remap_frame_layout
 from rpython.jit.backend.arm.regalloc import TempInt, TempPtr
 from rpython.jit.backend.arm.locations import imm
-from rpython.jit.backend.llsupport import symbolic
+from rpython.jit.backend.llsupport import symbolic, jitframe
 from rpython.jit.backend.llsupport.descr import InteriorFieldDescr
 from rpython.jit.metainterp.history import (Box, AbstractFailDescr,
                                             INT, FLOAT, REF)
@@ -35,18 +35,43 @@
 
 
 class GuardToken(object):
-    def __init__(self, descr, failargs, faillocs, offset, save_exc, fcond=c.AL,
+    def __init__(self, faildescr, failargs, fail_locs, offset, exc, fcond=c.AL,
                  is_guard_not_invalidated=False, is_guard_not_forced=False):
-        assert isinstance(save_exc, bool)
-        self.descr = descr
+        assert isinstance(exc, bool)
+        self.faildescr = faildescr
+        self.failargs = failargs
+        self.fail_locs = fail_locs[1:]
         self.offset = offset
+        self.gcmap = self.compute_gcmap(failargs, fail_locs, 
fail_locs[0].value)
+        self.exc = exc
         self.is_guard_not_invalidated = is_guard_not_invalidated
         self.is_guard_not_forced = is_guard_not_forced
-        self.failargs = failargs
-        self.faillocs = faillocs
-        self.save_exc = save_exc
         self.fcond = fcond
 
+    def compute_gcmap(self, failargs, fail_locs, frame_depth):
+        # note that regalloc has a very similar compute, but
+        # one that does iteration over all bindings, so slightly different,
+        # eh
+        size = frame_depth + JITFRAME_FIXED_SIZE
+        gcmap = lltype.malloc(jitframe.GCMAP, size // WORD // 8 + 1,
+                              zero=True)
+        input_i = 0
+        for i in range(len(failargs)):
+            arg = failargs[i]
+            if arg is None:
+                continue
+            loc = fail_locs[input_i]
+            input_i += 1
+            if arg.type == REF:
+                loc = fail_locs[i]
+                if loc.is_reg():
+                    val = loc.value
+                else:
+                    assert 0, 'ffuu, implement'
+                    val = loc.value // WORD
+                gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8))
+        return gcmap
+
 
 class ResOpAssembler(object):
 
@@ -211,9 +236,9 @@
             self.mc.BKPT()
         self.pending_guards.append(GuardToken(descr,
                                     failargs=op.getfailargs(),
-                                    faillocs=arglocs,
+                                    fail_locs=arglocs,
                                     offset=pos,
-                                    save_exc=save_exc,
+                                    exc=save_exc,
                                     
is_guard_not_invalidated=is_guard_not_invalidated,
                                     is_guard_not_forced=is_guard_not_forced,
                                     fcond=fcond))
diff --git a/rpython/jit/backend/arm/regalloc.py 
b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -709,6 +709,7 @@
         # optimization only: fill in the 'hint_frame_locations' dictionary
         # of rm and xrm based on the JUMP at the end of the loop, by looking
         # at where we would like the boxes to be after the jump.
+        return # XXX disabled for now
         op = operations[-1]
         if op.getopnum() != rop.JUMP:
             return
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to