[pypy-commit] pypy jitframe-on-heap: kill kill kill - enough progress to pass call_loop

fijal Sat, 12 Jan 2013 05:43:41 -0800

Author: Maciej Fijalkowski <[email protected]>
Branch: jitframe-on-heap
Changeset: r59986:3a2e21e6fe99
Date: 2013-01-12 15:41 +0200
http://bitbucket.org/pypy/pypy/changeset/3a2e21e6fe99/


Log:    kill kill kill - enough progress to pass call_loop

diff --git a/pypy/jit/backend/llsupport/llmodel.py 
b/pypy/jit/backend/llsupport/llmodel.py
--- a/pypy/jit/backend/llsupport/llmodel.py
+++ b/pypy/jit/backend/llsupport/llmodel.py
@@ -279,7 +279,7 @@
         if descr.final_descr:
             assert index == 0
             return 0
-        xxx
+        return descr.rd_locs[index]
 
     def get_int_value(self, deadframe, index):
         pos = self._decode_pos(deadframe, index)
diff --git a/pypy/jit/backend/x86/arch.py b/pypy/jit/backend/x86/arch.py
--- a/pypy/jit/backend/x86/arch.py
+++ b/pypy/jit/backend/x86/arch.py
@@ -48,14 +48,15 @@
     FORCE_INDEX_OFS = 0
     SAVED_REGISTERS = 1    # range(1, 5)
     MY_COPY_OF_REGS = 5    # range(5, 9)
-    JITFRAME_FIXED_SIZE = 1
+    XXX
 else:
     # rbp + rbx + r12 + r13 + r14 + r15 + 11 extra words + force_index = 18
     FRAME_FIXED_SIZE = 18 # 18 aligned to 16 bytes = 2 * WORD
     FORCE_INDEX_OFS = 0
     SAVED_REGISTERS = 1    # range(1, 7)
     MY_COPY_OF_REGS = 7    # range(7, 18)
-    JITFRAME_FIXED_SIZE = 1
+    JITFRAME_FIXED_SIZE = 32 # 1 for the number and 32 for all the registers,
+                             # but they're never used together
 
 # "My copy of regs" has room for almost all registers, apart from eax and edx
 # which are used in the malloc itself.  They are:
diff --git a/pypy/jit/backend/x86/assembler.py 
b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -14,7 +14,8 @@
     gpr_reg_mgr_cls, xmm_reg_mgr_cls, _valid_addressing_size)
 
 from pypy.jit.backend.x86.arch import (FRAME_FIXED_SIZE, FORCE_INDEX_OFS, WORD,
-                                       IS_X86_32, IS_X86_64)
+                                       IS_X86_32, IS_X86_64,
+                                       JITFRAME_FIXED_SIZE)
 
 from pypy.jit.backend.x86.regloc import (eax, ecx, edx, ebx,
                                          esp, ebp, esi, edi,
@@ -698,6 +699,7 @@
         p = rffi.cast(rffi.INTP, adr_jump_offset)
         adr_target = adr_jump_offset + 4 + rffi.cast(lltype.Signed, p[0])
         # skip the CALL
+        XXX
         if WORD == 4:
             adr_target += 5     # CALL imm
         else:
@@ -1834,279 +1836,41 @@
         exc = guardtok.exc
         target = self.failure_recovery_code[exc + 2 * withfloats]
         if WORD == 4:
-            mc.CALL(imm(target))
+            mc.PUSH(imm(fail_index))
+            mc.JMP(imm(target))
         else:
-            # Generate exactly 13 bytes:
-            #        MOV r11, target-as-8-bytes
-            #        CALL *r11
-            # Keep the number 13 in sync with _find_failure_recovery_bytecode.
-            start = mc.get_relative_pos()
             mc.MOV_ri64(X86_64_SCRATCH_REG.value, target)
-            mc.CALL_r(X86_64_SCRATCH_REG.value)
-            assert mc.get_relative_pos() == start + 13
+            mc.PUSH(imm(fail_index))
+            mc.JMP_r(X86_64_SCRATCH_REG.value)
         # write tight data that describes the failure recovery
         if guardtok.is_guard_not_forced:
+            XXX
             mc.writechar(chr(self.CODE_FORCED))
-        self.write_failure_recovery_description(mc, guardtok.failargs,
-                                                guardtok.fail_locs)
-        # write the fail_index too
-        mc.writeimm32(fail_index)
+        positions = [0] * len(guardtok.fail_locs)
+        assert IS_X86_64
+        for i, loc in enumerate(guardtok.fail_locs):
+            if loc is None:
+                positions[i] = -1
+            elif isinstance(loc, StackLoc):
+                xxx
+            else:
+                assert isinstance(loc, RegLoc)
+                positions[i] = (loc.value + loc.is_xmm * 16) * WORD
+        guardtok.faildescr.rd_locs = positions
+        # write fail_index too
         # for testing the decoding, write a final byte 0xCC
-        if not we_are_translated():
-            mc.writechar('\xCC')
-            faillocs = [loc for loc in guardtok.fail_locs if loc is not None]
-            guardtok.faildescr._x86_debug_faillocs = faillocs
         return startpos
 
-    DESCR_REF       = 0x00
-    DESCR_INT       = 0x01
-    DESCR_FLOAT     = 0x02
-    DESCR_SPECIAL   = 0x03
-    CODE_FROMSTACK  = 4 * (8 + 8*IS_X86_64)
-    CODE_STOP       = 0 | DESCR_SPECIAL
-    CODE_HOLE       = 4 | DESCR_SPECIAL
-    CODE_INPUTARG   = 8 | DESCR_SPECIAL
-    CODE_FORCED     = 12 | DESCR_SPECIAL
-
-    def write_failure_recovery_description(self, mc, failargs, locs):
-        for i in range(len(failargs)):
-            arg = failargs[i]
-            if arg is not None:
-                if arg.type == REF:
-                    kind = self.DESCR_REF
-                elif arg.type == INT:
-                    kind = self.DESCR_INT
-                elif arg.type == FLOAT:
-                    kind = self.DESCR_FLOAT
-                else:
-                    raise AssertionError("bogus kind")
-                loc = locs[i]
-                if isinstance(loc, StackLoc):
-                    pos = loc.position
-                    if pos < 0:
-                        mc.writechar(chr(self.CODE_INPUTARG))
-                        pos = ~pos
-                    n = self.CODE_FROMSTACK//4 + pos
-                else:
-                    assert isinstance(loc, RegLoc)
-                    n = loc.value
-                n = kind + 4*n
-                while n > 0x7F:
-                    mc.writechar(chr((n & 0x7F) | 0x80))
-                    n >>= 7
-            else:
-                n = self.CODE_HOLE
-            mc.writechar(chr(n))
-        mc.writechar(chr(self.CODE_STOP))
-
-    def rebuild_faillocs_from_descr(self, bytecode):
-        from pypy.jit.backend.x86.regalloc import X86FrameManager
-        descr_to_box_type = [REF, INT, FLOAT]
-        bytecode = rffi.cast(rffi.UCHARP, bytecode)
-        arglocs = []
-        code_inputarg = False
-        while 1:
-            # decode the next instruction from the bytecode
-            code = rffi.cast(lltype.Signed, bytecode[0])
-            bytecode = rffi.ptradd(bytecode, 1)
-            if code >= self.CODE_FROMSTACK:
-                # 'code' identifies a stack location
-                if code > 0x7F:
-                    shift = 7
-                    code &= 0x7F
-                    while True:
-                        nextcode = rffi.cast(lltype.Signed, bytecode[0])
-                        bytecode = rffi.ptradd(bytecode, 1)
-                        code |= (nextcode & 0x7F) << shift
-                        shift += 7
-                        if nextcode <= 0x7F:
-                            break
-                kind = code & 3
-                code = (code - self.CODE_FROMSTACK) >> 2
-                if code_inputarg:
-                    code = ~code
-                    code_inputarg = False
-                loc = X86FrameManager.frame_pos(code, descr_to_box_type[kind])
-            elif code == self.CODE_STOP:
-                break
-            elif code == self.CODE_HOLE:
-                continue
-            elif code == self.CODE_INPUTARG:
-                code_inputarg = True
-                continue
-            else:
-                # 'code' identifies a register
-                kind = code & 3
-                code >>= 2
-                if kind == self.DESCR_FLOAT:
-                    loc = regloc.XMMREGLOCS[code]
-                else:
-                    loc = regloc.REGLOCS[code]
-            arglocs.append(loc)
-        return arglocs[:]
-
-    @staticmethod
-    @rgc.no_collect
-    def grab_frame_values(cpu, bytecode, frame_addr, allregisters):
-        # no malloc allowed here!!  xxx apart from one, hacking a lot
-        #self.fail_ebp = allregisters[16 + ebp.value]
-        num = 0
-        import pdb
-        pdb.set_trace()
-        XXX
-        deadframe = lltype.nullptr(jitframe.DEADFRAME)
-        # step 1: lots of mess just to count the final value of 'num'
-        bytecode1 = bytecode
-        while 1:
-            code = rffi.cast(lltype.Signed, bytecode1[0])
-            bytecode1 = rffi.ptradd(bytecode1, 1)
-            if code >= Assembler386.CODE_FROMSTACK:
-                while code > 0x7F:
-                    code = rffi.cast(lltype.Signed, bytecode1[0])
-                    bytecode1 = rffi.ptradd(bytecode1, 1)
-            else:
-                kind = code & 3
-                if kind == Assembler386.DESCR_SPECIAL:
-                    if code == Assembler386.CODE_HOLE:
-                        num += 1
-                        continue
-                    if code == Assembler386.CODE_INPUTARG:
-                        continue
-                    if code == Assembler386.CODE_FORCED:
-                        # resuming from a GUARD_NOT_FORCED
-                        token = allregisters[16 + ebp.value]
-                        deadframe = (
-                            cpu.assembler.force_token_to_dead_frame.pop(token))
-                        deadframe = lltype.cast_opaque_ptr(
-                            jitframe.DEADFRAMEPTR, deadframe)
-                        continue
-                    assert code == Assembler386.CODE_STOP
-                    break
-            num += 1
-        # allocate the deadframe
-        if not deadframe:
-            # Remove the "reserve" at the end of the nursery.  This means
-            # that it is guaranteed that the following malloc() works
-            # without requiring a collect(), but it needs to be re-added
-            # as soon as possible.
-            assert num <= cpu.get_failargs_limit()
-            try:
-                deadframe = lltype.malloc(jitframe.DEADFRAME, num)
-            except MemoryError:
-                fatalerror("memory usage error in grab_frame_values")
-        # fill it
-        code_inputarg = False
-        num = 0
-        value_hi = 0
-        while 1:
-            # decode the next instruction from the bytecode
-            code = rffi.cast(lltype.Signed, bytecode[0])
-            bytecode = rffi.ptradd(bytecode, 1)
-            if code >= Assembler386.CODE_FROMSTACK:
-                if code > 0x7F:
-                    shift = 7
-                    code &= 0x7F
-                    while True:
-                        nextcode = rffi.cast(lltype.Signed, bytecode[0])
-                        bytecode = rffi.ptradd(bytecode, 1)
-                        code |= (nextcode & 0x7F) << shift
-                        shift += 7
-                        if nextcode <= 0x7F:
-                            break
-                # load the value from the stack
-                kind = code & 3
-                code = (code - Assembler386.CODE_FROMSTACK) >> 2
-                if code_inputarg:
-                    code = ~code
-                    code_inputarg = False
-                stackloc = frame_addr + get_ebp_ofs(code)
-                value = rffi.cast(rffi.LONGP, stackloc)[0]
-                if kind == Assembler386.DESCR_FLOAT and WORD == 4:
-                    value_hi = value
-                    value = rffi.cast(rffi.LONGP, stackloc - 4)[0]
-            else:
-                kind = code & 3
-                if kind == Assembler386.DESCR_SPECIAL:
-                    if code == Assembler386.CODE_HOLE:
-                        num += 1
-                        continue
-                    if code == Assembler386.CODE_INPUTARG:
-                        code_inputarg = True
-                        continue
-                    if code == Assembler386.CODE_FORCED:
-                        continue
-                    assert code == Assembler386.CODE_STOP
-                    break
-                # 'code' identifies a register: load its value
-                code >>= 2
-                if kind == Assembler386.DESCR_FLOAT:
-                    if WORD == 4:
-                        value = allregisters[2*code]
-                        value_hi = allregisters[2*code + 1]
-                    else:
-                        value = allregisters[code]
-                else:
-                    value = allregisters[16 + code]
-
-            # store the loaded value into fail_boxes_<type>
-            if kind == Assembler386.DESCR_INT:
-                deadframe.jf_values[num].int = value
-            elif kind == Assembler386.DESCR_REF:
-                deadframe.jf_values[num].ref = rffi.cast(llmemory.GCREF, value)
-            elif kind == Assembler386.DESCR_FLOAT:
-                if WORD == 4:
-                    assert not longlong.is_64_bit
-                    floatvalue = rffi.cast(lltype.SignedLongLong, value_hi)
-                    floatvalue <<= 32
-                    floatvalue |= rffi.cast(lltype.SignedLongLong,
-                                            rffi.cast(lltype.Unsigned, value))
-                else:
-                    assert longlong.is_64_bit
-                    floatvalue = longlong2float.longlong2float(value)
-                deadframe.jf_values[num].float = floatvalue
-            else:
-                assert 0, "bogus kind"
-            num += 1
-        #
-        assert num == len(deadframe.jf_values)
-        if not we_are_translated():
-            assert bytecode[4] == 0xCC
-        #self.fail_boxes_count = num
-        fail_index = rffi.cast(rffi.INTP, bytecode)[0]
-        fail_descr = cpu.get_fail_descr_from_number(fail_index)
-        deadframe.jf_descr = fail_descr.hide(cpu)
-        return lltype.cast_opaque_ptr(llmemory.GCREF, deadframe)
-
     def setup_failure_recovery(self):
-        @rgc.no_collect
-        def failure_recovery_func(registers):
-            # 'registers' is a pointer to a structure containing the
-            # original value of the registers, optionally the original
-            # value of XMM registers, and finally a reference to the
-            # recovery bytecode.  See _build_failure_recovery() for details.
-            stack_at_ebp = registers[ebp.value]
-            bytecode = rffi.cast(rffi.UCHARP, registers[self.cpu.NUM_REGS])
-            allregisters = rffi.ptradd(registers, -16)
-            return self.grab_frame_values(self.cpu, bytecode, stack_at_ebp,
-                                          allregisters)
-
-        self.failure_recovery_func = failure_recovery_func
         self.failure_recovery_code = [0, 0, 0, 0]
 
-    _FAILURE_RECOVERY_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP],
-                                                        llmemory.GCREF))
-
     def _build_failure_recovery(self, exc, withfloats=False):
-        failure_recovery_func = llhelper(self._FAILURE_RECOVERY_FUNC,
-                                         self.failure_recovery_func)
-        failure_recovery_func = rffi.cast(lltype.Signed,
-                                          failure_recovery_func)
         mc = codebuf.MachineCodeBlockWrapper()
         self.mc = mc
 
         # Push all general purpose registers
-        for gpr in range(self.cpu.NUM_REGS-1, -1, -1):
-            mc.PUSH_r(gpr)
+        for gpr in range(self.cpu.NUM_REGS):
+            mc.MOV_br(gpr * WORD, gpr)
 
         if exc:
             # We might have an exception pending.  Load it into ebx
@@ -2115,42 +1879,23 @@
             mc.MOV(heap(self.cpu.pos_exception()), imm0)
             mc.MOV(heap(self.cpu.pos_exc_value()), imm0)
 
-        # Load the current esp value into edi.  On 64-bit, this is the
-        # argument.  On 32-bit, it will be pushed as argument below.
-        mc.MOV_rr(edi.value, esp.value)
-
         if withfloats:
-            # Push all float registers
-            mc.SUB_ri(esp.value, self.cpu.NUM_REGS*8)
             for i in range(self.cpu.NUM_REGS):
-                mc.MOVSD_sx(8*i, i)
-
-        # the following call saves all values from the stack and from
-        # registers to a fresh new deadframe object.
-        # Note that the registers are saved so far in esi[0] to esi[7],
-        # as pushed above, plus optionally in esi[-16] to esi[-1] for
-        # the XMM registers.  Moreover, esi[8] is a pointer to the recovery
-        # bytecode, pushed just before by the CALL instruction written by
-        # generate_quick_failure().
-
-        if IS_X86_32:
-            mc.SUB_ri(esp.value, 3*WORD)    # for stack alignment
-            mc.PUSH_r(edi.value)
-
-        mc.CALL(imm(failure_recovery_func))
-        # returns in eax the deadframe object
+                mc.MOVSD_bx((16 + i) * WORD, i)
 
         if exc:
             # save ebx into 'jf_guard_exc'
             from pypy.jit.backend.llsupport.descr import unpack_fielddescr
             descrs = self.cpu.gc_ll_descr.getframedescrs(self.cpu)
             offset, size, _ = unpack_fielddescr(descrs.jf_guard_exc)
+            # XXX ebp relative, not eax relative
             mc.MOV_mr((eax.value, offset), ebx.value)
 
         # now we return from the complete frame, which starts from
         # _call_header_with_stack_check().  The LEA in _call_footer below
         # throws away most of the frame, including all the PUSHes that we
         # did just above.
+        mc.POP(eax)
 
         self._call_footer()
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -116,7 +116,7 @@
             func = rffi.cast(FUNCPTR, addr)
             #llop.debug_print(lltype.Void, ">>>> Entering", addr)
             frame = lltype.malloc(jitframe.JITFRAME, clt.frame_depth +
-                                  JITFRAME_FIXED_SIZE)
+                                  JITFRAME_FIXED_SIZE, zero=True)
             frame.jf_frame_info = clt.frame_info
             ll_frame = lltype.cast_opaque_ptr(llmemory.GCREF, frame)
             prev_interpreter = None   # help flow space
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy jitframe-on-heap: kill kill kill - enough progress to pass call_loop

Reply via email to