Author: David Schneider <[email protected]>
Branch: arm-backend-2
Changeset: r51548:d06bbcb1c9fb
Date: 2012-01-20 17:13 +0100
http://bitbucket.org/pypy/pypy/changeset/d06bbcb1c9fb/

Log:    (arigo, bivab) refactor the code used to make calls to handle more
        work in the register allocator and perform calls only using
        locations.

diff --git a/pypy/jit/backend/arm/opassembler.py 
b/pypy/jit/backend/arm/opassembler.py
--- a/pypy/jit/backend/arm/opassembler.py
+++ b/pypy/jit/backend/arm/opassembler.py
@@ -361,14 +361,13 @@
         self.gen_func_epilog()
         return fcond
 
-    def emit_op_call(self, op, args, regalloc, fcond,
-                                force_index=NO_FORCE_INDEX):
-        adr = args[0].value
-        arglist = op.getarglist()[1:]
+    def emit_op_call(self, op, arglocs, regalloc, fcond, 
force_index=NO_FORCE_INDEX):
         if force_index == NO_FORCE_INDEX:
             force_index = self.write_new_force_index()
-        cond = self._emit_call(force_index, adr, arglist,
-                                    regalloc, fcond, op.result)
+        resloc = arglocs[0]
+        adr = arglocs[1]
+        arglist = arglocs[2:]
+        cond = self._emit_call(force_index, adr, arglist, fcond, resloc)
         descr = op.getdescr()
         #XXX Hack, Hack, Hack
         if (op.result and not we_are_translated()):
@@ -379,15 +378,10 @@
             self._ensure_result_bit_extension(loc, size, signed)
         return cond
 
-    # XXX improve this interface
-    # emit_op_call_may_force
-    # XXX improve freeing of stuff here
-    # XXX add an interface that takes locations instead of boxes
-    def _emit_call(self, force_index, adr, args, regalloc, fcond=c.AL,
-                                                            result=None):
-        n_args = len(args)
-        reg_args = count_reg_args(args)
-
+    def _emit_call(self, force_index, adr, arglocs, fcond=c.AL, resloc=None):
+        assert self._regalloc.before_call_called
+        n_args = len(arglocs)
+        reg_args = count_reg_args(arglocs)
         # all arguments past the 4th go on the stack
         n = 0   # used to count the number of words pushed on the stack, so we
                 #can later modify the SP back to its original value
@@ -396,7 +390,7 @@
             stack_args = []
             count = 0
             for i in range(reg_args, n_args):
-                arg = args[i]
+                arg = arglocs[i]
                 if arg.type != FLOAT:
                     count += 1
                     n += WORD
@@ -417,8 +411,7 @@
                 if arg is None:
                     self.mc.PUSH([r.ip.value])
                 else:
-                    self.regalloc_push(regalloc.loc(arg))
-
+                    self.regalloc_push(arg)
         # collect variables that need to go in registers and the registers they
         # will be stored in
         num = 0
@@ -427,16 +420,16 @@
         non_float_regs = []
         float_locs = []
         for i in range(reg_args):
-            arg = args[i]
+            arg = arglocs[i]
             if arg.type == FLOAT and count % 2 != 0:
                     num += 1
                     count = 0
             reg = r.caller_resp[num]
 
             if arg.type == FLOAT:
-                float_locs.append((regalloc.loc(arg), reg))
+                float_locs.append((arg, reg))
             else:
-                non_float_locs.append(regalloc.loc(arg))
+                non_float_locs.append(arg)
                 non_float_regs.append(reg)
 
             if arg.type == FLOAT:
@@ -457,14 +450,12 @@
         #the actual call
         self.mc.BL(adr)
         self.mark_gc_roots(force_index)
-        regalloc.possibly_free_vars(args)
         # readjust the sp in case we passed some args on the stack
         if n > 0:
             self._adjust_sp(-n, fcond=fcond)
 
         # restore the argumets stored on the stack
-        if result is not None:
-            resloc = regalloc.after_call(result)
+        if resloc is not None:
             if resloc.is_vfp_reg():
                 # move result to the allocated register
                 self.mov_to_vfp_loc(r.r0, r.r1, resloc)
@@ -889,8 +880,8 @@
             length_box = TempInt()
             length_loc = regalloc.force_allocate_reg(length_box,
                                         forbidden_vars, selected_reg=r.r2)
-            imm = regalloc.convert_to_imm(args[4])
-            self.load(length_loc, imm)
+            immloc = regalloc.convert_to_imm(args[4])
+            self.load(length_loc, immloc)
         if is_unicode:
             bytes_box = TempPtr()
             bytes_loc = regalloc.force_allocate_reg(bytes_box,
@@ -902,8 +893,9 @@
             length_box = bytes_box
             length_loc = bytes_loc
         # call memcpy()
-        self._emit_call(NO_FORCE_INDEX, self.memcpy_addr,
-                            [dstaddr_box, srcaddr_box, length_box], regalloc)
+        regalloc.before_call()
+        self._emit_call(NO_FORCE_INDEX, imm(self.memcpy_addr),
+                            [dstaddr_loc, srcaddr_loc, length_loc])
 
         regalloc.possibly_free_var(length_box)
         regalloc.possibly_free_var(dstaddr_box)
@@ -993,17 +985,19 @@
     # XXX Split into some helper methods
     def emit_guard_call_assembler(self, op, guard_op, arglocs, regalloc,
                                                                     fcond):
+        tmploc = arglocs[1]
+        resloc = arglocs[2]
+        callargs = arglocs[3:]
+
         faildescr = guard_op.getdescr()
         fail_index = self.cpu.get_fail_descr_number(faildescr)
         self._write_fail_index(fail_index)
-
         descr = op.getdescr()
         assert isinstance(descr, JitCellToken)
-        # XXX check this
-        # assert len(arglocs) - 2 == descr.compiled_loop_token._debug_nbargs
-        resbox = TempInt()
-        self._emit_call(fail_index, descr._arm_func_addr,
-                        op.getarglist(), regalloc, fcond, result=resbox)
+        # check value
+        assert tmploc is r.r0
+        self._emit_call(fail_index, imm(descr._arm_func_addr),
+                                callargs, fcond, resloc=tmploc)
         if op.result is None:
             value = self.cpu.done_with_this_frame_void_v
         else:
@@ -1016,12 +1010,8 @@
                 value = self.cpu.done_with_this_frame_float_v
             else:
                 raise AssertionError(kind)
-        # check value
-        resloc = regalloc.try_allocate_reg(resbox)
-        assert resloc is r.r0
         self.mc.gen_load_int(r.ip.value, value)
-        self.mc.CMP_rr(resloc.value, r.ip.value)
-        regalloc.possibly_free_var(resbox)
+        self.mc.CMP_rr(tmploc.value, r.ip.value)
 
         fast_jmp_pos = self.mc.currpos()
         self.mc.BKPT()
@@ -1035,14 +1025,12 @@
         asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
         with saved_registers(self.mc, r.caller_resp[1:] + [r.ip],
                                     r.caller_vfp_resp):
-            # resbox is allready in r0
-            self.mov_loc_loc(arglocs[1], r.r1)
+            # result of previous call is in r0
+            self.mov_loc_loc(arglocs[0], r.r1)
             self.mc.BL(asm_helper_adr)
-            if op.result:
-                resloc = regalloc.after_call(op.result)
-                if resloc.is_vfp_reg():
-                    # move result to the allocated register
-                    self.mov_to_vfp_loc(r.r0, r.r1, resloc)
+            if op.result and resloc.is_vfp_reg():
+                # move result to the allocated register
+                self.mov_to_vfp_loc(r.r0, r.r1, resloc)
 
         # jump to merge point
         jmp_pos = self.mc.currpos()
@@ -1063,11 +1051,10 @@
             fielddescr = jd.vable_token_descr
             assert isinstance(fielddescr, FieldDescr)
             ofs = fielddescr.offset
-            resloc = regalloc.force_allocate_reg(resbox)
-            self.mov_loc_loc(arglocs[1], r.ip)
-            self.mc.MOV_ri(resloc.value, 0)
-            self.mc.STR_ri(resloc.value, r.ip.value, ofs)
-            regalloc.possibly_free_var(resbox)
+            tmploc = regalloc.get_scratch_reg(INT)
+            self.mov_loc_loc(arglocs[0], r.ip)
+            self.mc.MOV_ri(tmploc.value, 0)
+            self.mc.STR_ri(tmploc.value, r.ip.value, ofs)
 
         if op.result is not None:
             # load the return value from fail_boxes_xxx[0]
@@ -1080,8 +1067,6 @@
                 adr = self.fail_boxes_float.get_addr_for_num(0)
             else:
                 raise AssertionError(kind)
-            resloc = regalloc.force_allocate_reg(op.result)
-            regalloc.possibly_free_var(resbox)
             self.mc.gen_load_int(r.ip.value, adr)
             if op.result.type == FLOAT:
                 self.mc.VLDR(resloc.value, r.ip.value)
@@ -1118,14 +1103,48 @@
 
     def emit_guard_call_may_force(self, op, guard_op, arglocs, regalloc,
                                                                     fcond):
+        faildescr = guard_op.getdescr()
+        fail_index = self.cpu.get_fail_descr_number(faildescr)
+        self._write_fail_index(fail_index)
+        numargs = op.numargs()
+        callargs = arglocs[2:numargs]
+        adr = arglocs[1]
+        resloc = arglocs[0]
+        self._emit_call(fail_index, adr, callargs, fcond, resloc)
+
+        self.mc.LDR_ri(r.ip.value, r.fp.value)
+        self.mc.CMP_ri(r.ip.value, 0)
+        self._emit_guard(guard_op, arglocs[1 + numargs:], c.GE, save_exc=True)
+        return fcond
+
+    def emit_guard_call_release_gil(self, op, guard_op, arglocs, regalloc,
+                                                                    fcond):
+
+        # first, close the stack in the sense of the asmgcc GC root tracker
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        numargs = op.numargs()
+        resloc = arglocs[0]
+        adr = arglocs[1]
+        callargs = arglocs[2:numargs]
+
+        if gcrootmap:
+            self.call_release_gil(gcrootmap, arglocs, fcond)
+        # do the call
+        faildescr = guard_op.getdescr()
+        fail_index = self.cpu.get_fail_descr_number(faildescr)
+        self._write_fail_index(fail_index)
+
+        self._emit_call(fail_index, adr, callargs, fcond, resloc)
+        # then reopen the stack
+        if gcrootmap:
+            self.call_reacquire_gil(gcrootmap, resloc, fcond)
+
         self.mc.LDR_ri(r.ip.value, r.fp.value)
         self.mc.CMP_ri(r.ip.value, 0)
 
-        self._emit_guard(guard_op, arglocs, c.GE, save_exc=True)
+        self._emit_guard(guard_op, arglocs[1 + numargs:], c.GE, save_exc=True)
         return fcond
 
-    emit_guard_call_release_gil = emit_guard_call_may_force
-
     def call_release_gil(self, gcrootmap, save_registers, fcond):
         # First, we need to save away the registers listed in
         # 'save_registers' that are not callee-save.  XXX We assume that
@@ -1136,8 +1155,7 @@
                 regs_to_save.append(reg)
         assert gcrootmap.is_shadow_stack
         with saved_registers(self.mc, regs_to_save):
-            self._emit_call(NO_FORCE_INDEX, self.releasegil_addr, [],
-                                                    self._regalloc, fcond)
+            self._emit_call(NO_FORCE_INDEX, imm(self.releasegil_addr), [], 
fcond)
 
     def call_reacquire_gil(self, gcrootmap, save_loc, fcond):
         # save the previous result into the stack temporarily.
@@ -1154,8 +1172,7 @@
             regs_to_save.append(r.ip)  # for alingment
         assert gcrootmap.is_shadow_stack
         with saved_registers(self.mc, regs_to_save, vfp_regs_to_save):
-            self._emit_call(NO_FORCE_INDEX, self.reacqgil_addr, [],
-                                                    self._regalloc, fcond)
+            self._emit_call(NO_FORCE_INDEX, imm(self.reacqgil_addr), [], fcond)
 
     def write_new_force_index(self):
         # for shadowstack only: get a new, unused force_index number and
diff --git a/pypy/jit/backend/arm/regalloc.py b/pypy/jit/backend/arm/regalloc.py
--- a/pypy/jit/backend/arm/regalloc.py
+++ b/pypy/jit/backend/arm/regalloc.py
@@ -553,12 +553,28 @@
                 args = self.prepare_op_math_sqrt(op, fcond)
                 self.assembler.emit_op_math_sqrt(op, args, self, fcond)
                 return
-        args = [imm(rffi.cast(lltype.Signed, op.getarg(0).getint()))]
+        return self._prepare_call(op)
+
+    def _prepare_call(self, op, force_store=[], save_all_regs=False):
+        args = []
+        args.append(None)
+        for i in range(op.numargs()):
+            args.append(self.loc(op.getarg(i)))
+        # spill variables that need to be saved around calls
+        self.vfprm.before_call(save_all_regs=save_all_regs)
+        if not save_all_regs:
+            gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
+            if gcrootmap and gcrootmap.is_shadow_stack:
+                save_all_regs = 2
+        self.rm.before_call(save_all_regs=save_all_regs)
+        if op.result:
+            resloc = self.after_call(op.result)
+            args[0] = resloc
+        self.before_call_called = True
         return args
 
     def prepare_op_call_malloc_gc(self, op, fcond):
-        args = [imm(rffi.cast(lltype.Signed, op.getarg(0).getint()))]
-        return args
+        return self._prepare_call(op)
 
     def _prepare_guard(self, op, args=None):
         if args is None:
@@ -1033,58 +1049,25 @@
             self._compute_hint_frame_locations_from_descr(descr)
 
     def prepare_guard_call_may_force(self, op, guard_op, fcond):
-        faildescr = guard_op.getdescr()
-        fail_index = self.cpu.get_fail_descr_number(faildescr)
-        self.assembler._write_fail_index(fail_index)
-        args = [imm(rffi.cast(lltype.Signed, op.getarg(0).getint()))]
-        for v in guard_op.getfailargs():
-            if v in self.rm.reg_bindings or v in self.vfprm.reg_bindings:
-                self.force_spill_var(v)
-        self.assembler.emit_op_call(op, args, self, fcond, fail_index)
-        locs = self._prepare_guard(guard_op)
-        self.possibly_free_vars(guard_op.getfailargs())
-        return locs
-
-    def prepare_guard_call_release_gil(self, op, guard_op, fcond):
-        # first, close the stack in the sense of the asmgcc GC root tracker
-        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
-        if gcrootmap:
-            arglocs = []
-            args = op.getarglist()
-            for i in range(op.numargs()):
-                loc = self._ensure_value_is_boxed(op.getarg(i), args)
-                arglocs.append(loc)
-            self.assembler.call_release_gil(gcrootmap, arglocs, fcond)
-        # do the call
-        faildescr = guard_op.getdescr()
-        fail_index = self.cpu.get_fail_descr_number(faildescr)
-        self.assembler._write_fail_index(fail_index)
-        args = [imm(rffi.cast(lltype.Signed, op.getarg(0).getint()))]
-        self.assembler.emit_op_call(op, args, self, fcond, fail_index)
-        # then reopen the stack
-        if gcrootmap:
-            if op.result:
-                result_loc = self.call_result_location(op.result)
-            else:
-                result_loc = None
-            self.assembler.call_reacquire_gil(gcrootmap, result_loc, fcond)
-        locs = self._prepare_guard(guard_op)
-        return locs
+        args = self._prepare_call(op, save_all_regs=True)
+        return self._prepare_guard(guard_op, args)
+    prepare_guard_call_release_gil = prepare_guard_call_may_force
 
     def prepare_guard_call_assembler(self, op, guard_op, fcond):
         descr = op.getdescr()
         assert isinstance(descr, JitCellToken)
         jd = descr.outermost_jitdriver_sd
         assert jd is not None
-        size = jd.portal_calldescr.get_result_size()
         vable_index = jd.index_of_virtualizable
         if vable_index >= 0:
             self._sync_var(op.getarg(vable_index))
             vable = self.frame_manager.loc(op.getarg(vable_index))
         else:
             vable = imm(0)
+        # make sure the call result location is free
+        tmploc = self.get_scratch_reg(INT, selected_reg=r.r0)
         self.possibly_free_vars(guard_op.getfailargs())
-        return [imm(size), vable]
+        return [vable, tmploc] + self._prepare_call(op, save_all_regs=True)
 
     def _prepare_args_for_new_op(self, new_args):
         gc_ll_descr = self.cpu.gc_ll_descr
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to