Author: hager <[email protected]>
Branch: ppc-jit-backend
Changeset: r53450:11155b0995aa
Date: 2012-03-13 10:34 -0700
http://bitbucket.org/pypy/pypy/changeset/11155b0995aa/
Log: (bivab, hager): bigger refactoring around emit_call to have more
separation of concerns
diff --git a/pypy/jit/backend/ppc/opassembler.py
b/pypy/jit/backend/ppc/opassembler.py
--- a/pypy/jit/backend/ppc/opassembler.py
+++ b/pypy/jit/backend/ppc/opassembler.py
@@ -2,8 +2,10 @@
gen_emit_unary_cmp_op)
import pypy.jit.backend.ppc.condition as c
import pypy.jit.backend.ppc.register as r
+from pypy.jit.backend.ppc.locations import imm
+from pypy.jit.backend.ppc.locations import imm as make_imm_loc
from pypy.jit.backend.ppc.arch import (IS_PPC_32, WORD, BACKCHAIN_SIZE,
- MAX_REG_PARAMS)
+ MAX_REG_PARAMS, FORCE_INDEX_OFS)
from pypy.jit.metainterp.history import (JitCellToken, TargetToken, Box,
AbstractFailDescr, FLOAT, INT, REF)
@@ -373,24 +375,25 @@
self.mc.store(r.SCRATCH.value, loc.value, 0)
self.mc.store(r.SCRATCH.value, loc1.value, 0)
- def emit_call(self, op, args, regalloc, force_index=-1):
- adr = args[0].value
- arglist = op.getarglist()[1:]
- if force_index == -1:
+ def emit_call(self, op, arglocs, regalloc, force_index=NO_FORCE_INDEX):
+ if force_index == NO_FORCE_INDEX:
force_index = self.write_new_force_index()
- self._emit_call(force_index, adr, arglist, regalloc, op.result)
+ resloc = arglocs[0]
+ adr = arglocs[1]
+ arglist = arglocs[2:]
+ self._emit_call(force_index, adr, arglist, resloc)
descr = op.getdescr()
#XXX Hack, Hack, Hack
- if op.result and not we_are_translated():
+ if (op.result and not we_are_translated()):
#XXX check result type
loc = regalloc.rm.call_result_location(op.result)
size = descr.get_result_size()
signed = descr.is_result_signed()
self._ensure_result_bit_extension(loc, size, signed)
- def _emit_call(self, force_index, adr, args, regalloc, result=None):
- n_args = len(args)
- reg_args = count_reg_args(args)
+ def _emit_call(self, force_index, adr, arglocs, result=None):
+ n_args = len(arglocs)
+ reg_args = count_reg_args(arglocs)
n = 0 # used to count the number of words pushed on the stack, so we
# can later modify the SP back to its original value
@@ -399,7 +402,7 @@
# first we need to prepare the list so it stays aligned
count = 0
for i in range(reg_args, n_args):
- arg = args[i]
+ arg = arglocs[i]
if arg.type == FLOAT:
assert 0, "not implemented yet"
else:
@@ -424,7 +427,7 @@
for i, arg in enumerate(stack_args):
offset = param_offset + i * WORD
if arg is not None:
- self.regalloc_mov(regalloc.loc(arg), r.SCRATCH)
+ self.regalloc_mov(arg, r.SCRATCH)
self.mc.store(r.SCRATCH.value, r.SP.value, offset)
# collect variables that need to go in registers
@@ -434,7 +437,7 @@
non_float_locs = []
non_float_regs = []
for i in range(reg_args):
- arg = args[i]
+ arg = arglocs[i]
if arg.type == FLOAT and count % 2 != 0:
assert 0, "not implemented yet"
reg = r.PARAM_REGS[num]
@@ -442,7 +445,7 @@
if arg.type == FLOAT:
assert 0, "not implemented yet"
else:
- non_float_locs.append(regalloc.loc(arg))
+ non_float_locs.append(arg)
non_float_regs.append(reg)
if arg.type == FLOAT:
@@ -451,22 +454,26 @@
num += 1
count += 1
- # spill variables that need to be saved around calls
- regalloc.before_call(save_all_regs=2)
+ if adr in non_float_regs:
+ non_float_locs.append(adr)
+ non_float_regs.append(r.r11)
+ adr = r.r11
# remap values stored in core registers
remap_frame_layout(self, non_float_locs, non_float_regs, r.SCRATCH)
# the actual call
- self.mc.call(adr)
+ if adr.is_imm():
+ self.mc.call(adr.value)
+ elif adr.is_stack():
+ assert 0, "not implemented yet"
+ elif adr.is_reg():
+ self.mc.call_register(adr)
+ else:
+ assert 0, "should not reach here"
self.mark_gc_roots(force_index)
- # restore the arguments stored on the stack
- if result is not None:
- regalloc.after_call(result)
-
-
class FieldOpAssembler(object):
_mixin_ = True
@@ -754,8 +761,10 @@
length_box = bytes_box
length_loc = bytes_loc
# call memcpy()
- self._emit_call(NO_FORCE_INDEX, self.memcpy_addr,
- [dstaddr_box, srcaddr_box, length_box], regalloc)
+ regalloc.before_call()
+ imm_addr = make_imm_loc(self.memcpy_addr)
+ self._emit_call(NO_FORCE_INDEX, imm_addr,
+ [dstaddr_loc, srcaddr_loc, length_loc])
regalloc.possibly_free_var(length_box)
regalloc.possibly_free_var(dstaddr_box)
@@ -1031,22 +1040,27 @@
def emit_force_token(self, op, arglocs, regalloc):
res_loc = arglocs[0]
+ ENCODING_AREA = len(r.MANAGED_REGS) * WORD
self.mc.mr(res_loc.value, r.SPP.value)
+ self.mc.addi(res_loc.value, res_loc.value, ENCODING_AREA)
+ # self._emit_guard(guard_op, regalloc._prepare_guard(guard_op), c.LT)
# from: ../x86/assembler.py:1668
# XXX Split into some helper methods
def emit_guard_call_assembler(self, op, guard_op, arglocs, regalloc):
+ tmploc = arglocs[1]
+ resloc = arglocs[2]
+ callargs = arglocs[3:]
+
faildescr = guard_op.getdescr()
fail_index = self.cpu.get_fail_descr_number(faildescr)
self._write_fail_index(fail_index)
-
descr = op.getdescr()
assert isinstance(descr, JitCellToken)
- # XXX check this
- #assert op.numargs() == len(descr._ppc_arglocs[0])
- resbox = TempInt()
- self._emit_call(fail_index, descr._ppc_func_addr, op.getarglist(),
- regalloc, result=resbox)
+ # check value
+ assert tmploc is r.RES
+ self._emit_call(fail_index, imm(descr._ppc_func_addr),
+ callargs, result=tmploc)
if op.result is None:
value = self.cpu.done_with_this_frame_void_v
else:
@@ -1056,61 +1070,40 @@
elif kind == REF:
value = self.cpu.done_with_this_frame_ref_v
elif kind == FLOAT:
- assert 0, "not implemented yet"
+ value = self.cpu.done_with_this_frame_float_v
else:
raise AssertionError(kind)
- # check value
- resloc = regalloc.try_allocate_reg(resbox)
- assert resloc is r.RES
+
+ # take fast path on equality
+ # => jump on inequality
with scratch_reg(self.mc):
self.mc.load_imm(r.SCRATCH, value)
- self.mc.cmp_op(0, resloc.value, r.SCRATCH.value)
- regalloc.possibly_free_var(resbox)
+ self.mc.cmp_op(0, tmploc.value, r.SCRATCH.value)
- fast_jmp_pos = self.mc.currpos()
- self.mc.nop()
-
- # Path A: use assembler helper
- # if values are equal we take the fast path
+ #if values are equal we take the fast path
# Slow path, calling helper
# jump to merge point
+
jd = descr.outermost_jitdriver_sd
assert jd is not None
- asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
- # do call to helper function
- self.mov_loc_loc(arglocs[1], r.r4)
- self.mc.call(asm_helper_adr)
+ # Path A: load return value and reset token
+ # Fast Path using result boxes
- if op.result:
- resloc = regalloc.after_call(op.result)
- if resloc.is_vfp_reg():
- assert 0, "not implemented yet"
-
- # jump to merge point
- jmp_pos = self.mc.currpos()
+ fast_jump_pos = self.mc.currpos()
self.mc.nop()
- # Path B: load return value and reset token
- # Fast Path using result boxes
- # patch the jump to the fast path
- offset = self.mc.currpos() - fast_jmp_pos
- pmc = OverwritingBuilder(self.mc, fast_jmp_pos, 1)
- # 12 and 2 mean: jump if the 3rd bit in CR is set
- pmc.bc(12, 2, offset)
- pmc.overwrite()
-
# Reset the vable token --- XXX really too much special logic here:-(
if jd.index_of_virtualizable >= 0:
from pypy.jit.backend.llsupport.descr import FieldDescr
fielddescr = jd.vable_token_descr
assert isinstance(fielddescr, FieldDescr)
- resloc = regalloc.force_allocate_reg(resbox)
+ ofs = fielddescr.offset
+ tmploc = regalloc.get_scratch_reg(INT)
with scratch_reg(self.mc):
- self.mov_loc_loc(arglocs[1], r.SCRATCH)
- self.mc.li(resloc.value, 0)
- self.mc.storex(resloc.value, 0, r.SCRATCH.value)
- regalloc.possibly_free_var(resbox)
+ self.mov_loc_loc(arglocs[0], r.SCRATCH)
+ self.mc.li(tmploc.value, 0)
+ self.mc.storex(tmploc.value, 0, r.SCRATCH.value)
if op.result is not None:
# load the return value from fail_boxes_xxx[0]
@@ -1120,11 +1113,9 @@
elif kind == REF:
adr = self.fail_boxes_ptr.get_addr_for_num(0)
elif kind == FLOAT:
- assert 0, "not implemented yet"
+ assert 0, "not implemented"
else:
raise AssertionError(kind)
- resloc = regalloc.force_allocate_reg(op.result)
- regalloc.possibly_free_var(resbox)
with scratch_reg(self.mc):
self.mc.load_imm(r.SCRATCH, adr)
if op.result.type == FLOAT:
@@ -1132,18 +1123,40 @@
else:
self.mc.loadx(resloc.value, 0, r.SCRATCH.value)
+ # jump to merge point, patched later
+ fast_path_to_end_jump_pos = self.mc.currpos()
+ self.mc.nop()
+
+ jmp_pos = self.mc.currpos()
+ pmc = OverwritingBuilder(self.mc, fast_jump_pos, 1)
+ pmc.bc(4, 2, jmp_pos - fast_jump_pos)
+ pmc.overwrite()
+
+ # Path B: use assembler helper
+ asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
+ if self.cpu.supports_floats:
+ assert 0, "not implemented yet"
+
+ with Saved_Volatiles(self.mc, save_RES=False):
+ # result of previous call is in r3
+ self.mov_loc_loc(arglocs[0], r.r4)
+ self.mc.call(asm_helper_adr)
+ if op.result and resloc.is_vfp_reg():
+ assert 0, "not implemented yet"
+
# merge point
- offset = self.mc.currpos() - jmp_pos
- if offset >= 0:
- pmc = OverwritingBuilder(self.mc, jmp_pos, 1)
- pmc.b(offset)
- pmc.overwrite()
+ currpos = self.mc.currpos()
+ pmc = OverwritingBuilder(self.mc, fast_path_to_end_jump_pos, 1)
+ pmc.b(currpos - fast_path_to_end_jump_pos)
+ pmc.overwrite()
+ ENCODING_AREA = len(r.MANAGED_REGS) * WORD
with scratch_reg(self.mc):
- self.mc.load(r.SCRATCH.value, r.SPP.value, 0)
+ self.mc.load(r.SCRATCH.value, r.SPP.value, ENCODING_AREA)
self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
- self._emit_guard(guard_op, regalloc._prepare_guard(guard_op), c.LT)
+ self._emit_guard(guard_op, regalloc._prepare_guard(guard_op),
+ c.LT, save_exc=True)
# ../x86/assembler.py:668
def redirect_call_assembler(self, oldlooptoken, newlooptoken):
@@ -1169,21 +1182,56 @@
odata[0] = tdata[0]
def emit_guard_call_may_force(self, op, guard_op, arglocs, regalloc):
- ENCODING_AREA = len(r.MANAGED_REGS) * WORD
+ faildescr = guard_op.getdescr()
+ fail_index = self.cpu.get_fail_descr_number(faildescr)
+ self._write_fail_index(fail_index)
+ numargs = op.numargs()
+ callargs = arglocs[2:numargs + 1] # extract the arguments to the call
+ adr = arglocs[1]
+ resloc = arglocs[0]
+ self._emit_call(fail_index, adr, callargs, resloc)
+
with scratch_reg(self.mc):
- self.mc.load(r.SCRATCH.value, r.SPP.value, ENCODING_AREA)
+ self.mc.load(r.SCRATCH.value, r.SPP.value, FORCE_INDEX_OFS)
self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
- self._emit_guard(guard_op, arglocs, c.LT, save_exc=True)
- emit_guard_call_release_gil = emit_guard_call_may_force
+ self._emit_guard(guard_op, arglocs[1 + numargs:], c.LT, save_exc=True)
+
+ def emit_guard_call_release_gil(self, op, guard_op, arglocs, regalloc):
+
+ # first, close the stack in the sense of the asmgcc GC root tracker
+ gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+ numargs = op.numargs()
+ callargs = arglocs[2:numargs + 1] # extract the arguments to the call
+ adr = arglocs[1]
+ resloc = arglocs[0]
+
+ if gcrootmap:
+ self.call_release_gil(gcrootmap, arglocs)
+ # do the call
+ faildescr = guard_op.getdescr()
+ fail_index = self.cpu.get_fail_descr_number(faildescr)
+ self._write_fail_index(fail_index)
+
+ self._emit_call(fail_index, adr, callargs, resloc)
+ # then reopen the stack
+ if gcrootmap:
+ self.call_reacquire_gil(gcrootmap, resloc)
+
+ with scratch_reg(self.mc):
+ self.mc.load(r.SCRATCH.value, r.SPP.value, 0)
+ self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
+
+ self._emit_guard(guard_op, arglocs[1 + numargs:], c.LT, save_exc=True)
def call_release_gil(self, gcrootmap, save_registers):
# XXX don't know whether this is correct
# XXX use save_registers here
assert gcrootmap.is_shadow_stack
with Saved_Volatiles(self.mc):
- self._emit_call(NO_FORCE_INDEX, self.releasegil_addr,
- [], self._regalloc)
+ #self._emit_call(NO_FORCE_INDEX, self.releasegil_addr,
+ # [], self._regalloc)
+ self._emit_call(NO_FORCE_INDEX, imm(self.releasegil_addr), [])
def call_reacquire_gil(self, gcrootmap, save_loc):
# save the previous result into the stack temporarily.
@@ -1191,8 +1239,7 @@
# to save vfp regs in this case. Besides the result location
assert gcrootmap.is_shadow_stack
with Saved_Volatiles(self.mc):
- self._emit_call(NO_FORCE_INDEX, self.reacqgil_addr,
- [], self._regalloc)
+ self._emit_call(NO_FORCE_INDEX, imm(self.reacqgil_addr), [])
class OpAssembler(IntOpAssembler, GuardOpAssembler,
diff --git a/pypy/jit/backend/ppc/ppc_assembler.py
b/pypy/jit/backend/ppc/ppc_assembler.py
--- a/pypy/jit/backend/ppc/ppc_assembler.py
+++ b/pypy/jit/backend/ppc/ppc_assembler.py
@@ -8,7 +8,8 @@
GPR_SAVE_AREA, BACKCHAIN_SIZE,
FPR_SAVE_AREA,
FLOAT_INT_CONVERSION,
FORCE_INDEX,
- SIZE_LOAD_IMM_PATCH_SP)
+ SIZE_LOAD_IMM_PATCH_SP,
+ FORCE_INDEX_OFS)
from pypy.jit.backend.ppc.helper.assembler import Saved_Volatiles
from pypy.jit.backend.ppc.helper.regalloc import _check_imm_arg
import pypy.jit.backend.ppc.register as r
@@ -196,7 +197,7 @@
to the failboxes. Values for spilled vars and registers are stored on
stack at frame_loc """
assert spp & 1 == 0
- self.fail_force_index = spp
+ self.fail_force_index = spp + FORCE_INDEX_OFS
bytecode = rffi.cast(rffi.UCHARP, mem_loc)
num = 0
value = 0
@@ -808,11 +809,15 @@
elif self.can_merge_with_next_guard(op, pos, operations)\
and opnum in (rop.CALL_RELEASE_GIL, rop.CALL_ASSEMBLER,\
rop.CALL_MAY_FORCE): # XXX fix
+ guard = operations[pos + 1]
+ assert guard.is_guard()
+ arglocs = regalloc.operations_with_guard[opnum](regalloc, op,
+ guard)
+ operations_with_guard[opnum](self, op,
+ guard, arglocs, regalloc)
regalloc.next_instruction()
- arglocs = regalloc.operations_with_guard[opnum](regalloc, op,
- operations[pos+1])
- operations_with_guard[opnum](self, op,
- operations[pos+1], arglocs, regalloc)
+ regalloc.possibly_free_vars_for_op(guard)
+ regalloc.possibly_free_vars(guard.getfailargs())
elif not we_are_translated() and op.getopnum() == -124:
regalloc.prepare_force_spill(op)
else:
diff --git a/pypy/jit/backend/ppc/regalloc.py b/pypy/jit/backend/ppc/regalloc.py
--- a/pypy/jit/backend/ppc/regalloc.py
+++ b/pypy/jit/backend/ppc/regalloc.py
@@ -384,8 +384,7 @@
return args
def prepare_call_malloc_gc(self, op):
- args = [imm(rffi.cast(lltype.Signed, op.getarg(0).getint()))]
- return args
+ return self._prepare_call(op)
def _prepare_guard(self, op, args=None):
if args is None:
@@ -498,33 +497,6 @@
if loc is not None and loc.is_stack():
self.frame_manager.hint_frame_locations[box] = loc
- def prepare_guard_call_release_gil(self, op, guard_op):
- # first, close the stack in the sense of the asmgcc GC root tracker
- gcrootmap = self.cpu.gc_ll_descr.gcrootmap
- if gcrootmap:
- arglocs = []
- args = op.getarglist()
- for i in range(op.numargs()):
- loc = self._ensure_value_is_boxed(op.getarg(i), args)
- arglocs.append(loc)
- self.assembler.call_release_gil(gcrootmap, arglocs)
- # do the call
- faildescr = guard_op.getdescr()
- fail_index = self.cpu.get_fail_descr_number(faildescr)
- self.assembler._write_fail_index(fail_index)
- args = [imm(rffi.cast(lltype.Signed, op.getarg(0).getint()))]
- self.assembler.emit_call(op, args, self, fail_index)
- # then reopen the stack
- if gcrootmap:
- if op.result:
- result_loc = self.call_result_location(op.result)
- else:
- result_loc = None
- self.assembler.call_reacquire_gil(gcrootmap, result_loc)
- locs = self._prepare_guard(guard_op)
- self.possibly_free_vars(guard_op.getfailargs())
- return locs
-
def prepare_jump(self, op):
descr = op.getdescr()
assert isinstance(descr, TargetToken)
@@ -765,14 +737,25 @@
def prepare_call(self, op):
effectinfo = op.getdescr().get_extra_info()
if effectinfo is not None:
- # XXX TODO
- #oopspecindex = effectinfo.oopspecindex
- #if oopspecindex == EffectInfo.OS_MATH_SQRT:
- # args = self.prepare_op_math_sqrt(op, fcond)
- # self.assembler.emit_op_math_sqrt(op, args, self, fcond)
- # return
+ # XXX TODO
pass
- args = [imm(rffi.cast(lltype.Signed, op.getarg(0).getint()))]
+ return self._prepare_call(op)
+
+ def _prepare_call(self, op, force_store=[], save_all_regs=False):
+ args = []
+ args.append(None)
+ for i in range(op.numargs()):
+ args.append(self.loc(op.getarg(i)))
+ # spill variables that need to be saved around calls
+ if not save_all_regs:
+ gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
+ if gcrootmap and gcrootmap.is_shadow_stack:
+ save_all_regs = 2
+ self.rm.before_call(save_all_regs=save_all_regs)
+ if op.result:
+ resloc = self.after_call(op.result)
+ args[0] = resloc
+ self.before_call_called = True
return args
def prepare_call_malloc_nursery(self, op):
@@ -883,33 +866,26 @@
self._compute_hint_frame_locations_from_descr(descr)
def prepare_guard_call_may_force(self, op, guard_op):
- faildescr = guard_op.getdescr()
- fail_index = self.cpu.get_fail_descr_number(faildescr)
- self.assembler._write_fail_index(fail_index)
- args = [imm(rffi.cast(lltype.Signed, op.getarg(0).getint()))]
- for v in guard_op.getfailargs():
- if v in self.rm.reg_bindings:
- self.force_spill_var(v)
- self.assembler.emit_call(op, args, self, fail_index)
- locs = self._prepare_guard(guard_op)
- self.possibly_free_vars(guard_op.getfailargs())
- return locs
+ args = self._prepare_call(op, save_all_regs=True)
+ return self._prepare_guard(guard_op, args)
+ prepare_guard_call_release_gil = prepare_guard_call_may_force
+
def prepare_guard_call_assembler(self, op, guard_op):
descr = op.getdescr()
assert isinstance(descr, JitCellToken)
jd = descr.outermost_jitdriver_sd
assert jd is not None
- #size =
jd.portal_calldescr.get_result_size(self.cpu.translate_support_code)
- size = jd.portal_calldescr.get_result_size()
vable_index = jd.index_of_virtualizable
if vable_index >= 0:
self._sync_var(op.getarg(vable_index))
vable = self.frame_manager.loc(op.getarg(vable_index))
else:
vable = imm(0)
+ # make sure the call result location is free
+ tmploc = self.get_scratch_reg(INT, selected_reg=r.RES)
self.possibly_free_vars(guard_op.getfailargs())
- return [imm(size), vable]
+ return [vable, tmploc] + self._prepare_call(op, save_all_regs=True)
def _prepare_args_for_new_op(self, new_args):
gc_ll_descr = self.cpu.gc_ll_descr
diff --git a/pypy/jit/backend/ppc/runner.py b/pypy/jit/backend/ppc/runner.py
--- a/pypy/jit/backend/ppc/runner.py
+++ b/pypy/jit/backend/ppc/runner.py
@@ -87,10 +87,10 @@
len(r.MANAGED_REGS),
flavor='raw', zero=True, immortal=True)
- def force(self, spilling_pointer):
+ def force(self, addr_of_force_index):
TP = rffi.CArrayPtr(lltype.Signed)
- addr_of_force_index = spilling_pointer + len(r.MANAGED_REGS) * WORD
+ spilling_pointer = addr_of_force_index - FORCE_INDEX_OFS
fail_index = rffi.cast(TP, addr_of_force_index)[0]
assert fail_index >= 0, "already forced!"
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit