Author: Maciej Fijalkowski <[email protected]>
Branch: backend-vector-ops
Changeset: r52016:f33339bbc410
Date: 2012-02-01 15:52 +0200
http://bitbucket.org/pypy/pypy/changeset/f33339bbc410/
Log: implement spilling. A bit of fun with alignment
diff --git a/pypy/jit/backend/llsupport/regalloc.py
b/pypy/jit/backend/llsupport/regalloc.py
--- a/pypy/jit/backend/llsupport/regalloc.py
+++ b/pypy/jit/backend/llsupport/regalloc.py
@@ -20,8 +20,6 @@
self.used = [] # list of bools
self.hint_frame_locations = {}
- frame_depth = property(lambda:xxx, lambda:xxx) # XXX kill me
-
def get_frame_depth(self):
return len(self.used)
@@ -45,7 +43,7 @@
return self.get_new_loc(box)
def get_new_loc(self, box):
- size = self.frame_size(box.type)
+ size = self.frame_size(box)
# frame_depth is rounded up to a multiple of 'size', assuming
# that 'size' is a power of two. The reason for doing so is to
# avoid obscure issues in jump.py with stack locations that try
@@ -54,7 +52,7 @@
self.used.append(False)
#
index = self.get_frame_depth()
- newloc = self.frame_pos(index, box.type)
+ newloc = self.frame_pos(index, box)
for i in range(size):
self.used.append(True)
#
@@ -71,7 +69,7 @@
index = self.get_loc_index(loc)
if index < 0:
return
- endindex = index + self.frame_size(box.type)
+ endindex = index + self.frame_size(box)
while len(self.used) < endindex:
self.used.append(False)
while index < endindex:
@@ -91,7 +89,7 @@
return # already gone
del self.bindings[box]
#
- size = self.frame_size(box.type)
+ size = self.frame_size(box)
baseindex = self.get_loc_index(loc)
if baseindex < 0:
return
@@ -104,7 +102,7 @@
index = self.get_loc_index(loc)
if index < 0:
return False
- size = self.frame_size(box.type)
+ size = self.frame_size(box)
for i in range(size):
while (index + i) >= len(self.used):
self.used.append(False)
@@ -118,10 +116,10 @@
# abstract methods that need to be overwritten for specific assemblers
@staticmethod
- def frame_pos(loc, type):
+ def frame_pos(loc, box):
raise NotImplementedError("Purely abstract")
@staticmethod
- def frame_size(type):
+ def frame_size(box):
return 1
@staticmethod
def get_loc_index(loc):
@@ -256,7 +254,7 @@
del self.reg_bindings[v_to_spill]
if self.frame_manager.get(v_to_spill) is None:
newloc = self.frame_manager.loc(v_to_spill)
- self.assembler.regalloc_mov(loc, newloc)
+ self.assembler.regalloc_mov(v_to_spill, loc, newloc)
return loc
def _pick_variable_to_spill(self, v, forbidden_vars, selected_reg=None,
@@ -343,11 +341,11 @@
immloc = self.convert_to_imm(v)
if selected_reg:
if selected_reg in self.free_regs:
- self.assembler.regalloc_mov(immloc, selected_reg)
+ self.assembler.regalloc_mov(v, immloc, selected_reg)
return selected_reg
loc = self._spill_var(v, forbidden_vars, selected_reg)
self.free_regs.append(loc)
- self.assembler.regalloc_mov(immloc, loc)
+ self.assembler.regalloc_mov(v, immloc, loc)
return loc
return immloc
@@ -366,7 +364,7 @@
loc = self.force_allocate_reg(v, forbidden_vars, selected_reg,
need_lower_byte=need_lower_byte)
if prev_loc is not loc:
- self.assembler.regalloc_mov(prev_loc, loc)
+ self.assembler.regalloc_mov(v, prev_loc, loc)
return loc
def _reallocate_from_to(self, from_v, to_v):
@@ -378,10 +376,10 @@
if self.free_regs:
loc = self.free_regs.pop()
self.reg_bindings[v] = loc
- self.assembler.regalloc_mov(prev_loc, loc)
+ self.assembler.regalloc_mov(v, prev_loc, loc)
else:
loc = self.frame_manager.loc(v)
- self.assembler.regalloc_mov(prev_loc, loc)
+ self.assembler.regalloc_mov(v, prev_loc, loc)
def force_result_in_reg(self, result_v, v, forbidden_vars=[]):
""" Make sure that result is in the same register as v.
@@ -395,13 +393,13 @@
loc = self.free_regs.pop()
else:
loc = self._spill_var(v, forbidden_vars, None)
- self.assembler.regalloc_mov(self.convert_to_imm(v), loc)
+ self.assembler.regalloc_mov(v, self.convert_to_imm(v), loc)
self.reg_bindings[result_v] = loc
return loc
if v not in self.reg_bindings:
prev_loc = self.frame_manager.loc(v)
loc = self.force_allocate_reg(v, forbidden_vars)
- self.assembler.regalloc_mov(prev_loc, loc)
+ self.assembler.regalloc_mov(v, prev_loc, loc)
assert v in self.reg_bindings
if self.longevity[v][1] > self.position:
# we need to find a new place for variable v and
@@ -420,7 +418,7 @@
if not self.frame_manager.get(v):
reg = self.reg_bindings[v]
to = self.frame_manager.loc(v)
- self.assembler.regalloc_mov(reg, to)
+ self.assembler.regalloc_mov(v, reg, to)
# otherwise it's clean
def before_call(self, force_store=[], save_all_regs=0):
diff --git a/pypy/jit/backend/test/runner_test.py
b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -3164,6 +3164,7 @@
assert a[0] == 26
assert a[1] == 30
lltype.free(a, flavor='raw')
+
class OOtypeBackendTest(BaseBackendTest):
diff --git a/pypy/jit/backend/x86/assembler.py
b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -1,7 +1,7 @@
import sys, os
from pypy.jit.backend.llsupport import symbolic
from pypy.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
-from pypy.jit.metainterp.history import Const, Box, BoxInt, ConstInt
+from pypy.jit.metainterp.history import Const, Box, BoxInt, ConstInt, BoxVector
from pypy.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
from pypy.jit.metainterp.history import JitCellToken
from pypy.rpython.lltypesystem import lltype, rffi, rstr, llmemory
@@ -833,8 +833,10 @@
# ------------------------------------------------------------
- def mov(self, from_loc, to_loc):
- if (isinstance(from_loc, RegLoc) and from_loc.is_xmm) or
(isinstance(to_loc, RegLoc) and to_loc.is_xmm):
+ def mov(self, box, from_loc, to_loc):
+ if isinstance(box, BoxVector):
+ self.mc.MOVDQU(to_loc, from_loc)
+ elif (isinstance(from_loc, RegLoc) and from_loc.is_xmm) or
(isinstance(to_loc, RegLoc) and to_loc.is_xmm):
self.mc.MOVSD(to_loc, from_loc)
else:
assert to_loc is not ebp
@@ -1285,7 +1287,7 @@
self.mc.MOVZX8(resloc, rl)
def genop_same_as(self, op, arglocs, resloc):
- self.mov(arglocs[0], resloc)
+ self.mov(op.getarg(0), arglocs[0], resloc)
genop_cast_ptr_to_int = genop_same_as
genop_cast_int_to_ptr = genop_same_as
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -5,7 +5,7 @@
import os
from pypy.jit.metainterp.history import (Box, Const, ConstInt, ConstPtr,
ResOperation, BoxPtr, ConstFloat,
- BoxFloat, INT, REF, FLOAT,
+ BoxFloat, INT, REF, FLOAT, BoxVector,
TargetToken, JitCellToken)
from pypy.jit.backend.x86.regloc import *
from pypy.rpython.lltypesystem import lltype, rffi, rstr
@@ -128,17 +128,25 @@
class X86FrameManager(FrameManager):
@staticmethod
- def frame_pos(i, box_type):
- if IS_X86_32 and box_type == FLOAT:
- return StackLoc(i, get_ebp_ofs(i+1), box_type)
- else:
- return StackLoc(i, get_ebp_ofs(i), box_type)
+ def frame_pos(i, box):
+ assert isinstance(box, Box)
+ if isinstance(box, BoxVector):
+ if IS_X86_32:
+ return StackLoc(i, get_ebp_ofs(i + 3), box.type)
+ return StackLoc(i, get_ebp_ofs(i + 1), box.type)
+ if IS_X86_32 and box.type == FLOAT:
+ return StackLoc(i, get_ebp_ofs(i+1), box.type)
+ return StackLoc(i, get_ebp_ofs(i), box.type)
@staticmethod
- def frame_size(box_type):
- if IS_X86_32 and box_type == FLOAT:
+ def frame_size(box):
+ assert isinstance(box, Box)
+ if isinstance(box, BoxVector):
+ if IS_X86_32:
+ return 4
return 2
- else:
- return 1
+ if IS_X86_32 and box.type == FLOAT:
+ return 2
+ return 1
@staticmethod
def get_loc_index(loc):
assert isinstance(loc, StackLoc)
@@ -370,7 +378,10 @@
self.assembler.regalloc_perform_math(op, arglocs, result_loc)
def locs_for_fail(self, guard_op):
- return [self.loc(v) for v in guard_op.getfailargs()]
+ failargs = guard_op.getfailargs()
+ for arg in failargs:
+ assert not isinstance(arg, BoxVector)
+ return [self.loc(v) for v in failargs]
def get_current_depth(self):
# return (self.fm.frame_depth, self.param_depth), but trying to share
@@ -701,11 +712,19 @@
self.xrm.possibly_free_vars_for_op(op)
consider_float_add = _consider_float_op
- consider_float_vector_add = _consider_float_op
consider_float_sub = _consider_float_op
consider_float_mul = _consider_float_op
consider_float_truediv = _consider_float_op
+ def _consider_float_vector_op(self, op):
+ loc1 = self.xrm.make_sure_var_in_reg(op.getarg(1))
+ args = op.getarglist()
+ loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
+ self.Perform(op, [loc0, loc1], loc0)
+ self.xrm.possibly_free_vars_for_op(op)
+
+ consider_float_vector_add = _consider_float_vector_op
+
def _consider_float_cmp(self, op, guard_op):
vx = op.getarg(0)
vy = op.getarg(1)
@@ -1240,7 +1259,7 @@
scale = self._get_unicode_item_scale()
if not (isinstance(length_loc, ImmedLoc) or
isinstance(length_loc, RegLoc)):
- self.assembler.mov(length_loc, bytes_loc)
+ self.assembler.mov(args[4], ength_loc, bytes_loc)
length_loc = bytes_loc
self.assembler.load_effective_addr(length_loc, 0, scale, bytes_loc)
length_box = bytes_box
@@ -1347,6 +1366,7 @@
# Build the four lists
for i in range(op.numargs()):
box = op.getarg(i)
+ assert not isinstance(box, BoxVector)
src_loc = self.loc(box)
dst_loc = arglocs[i]
if box.type != FLOAT:
diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py
--- a/pypy/jit/backend/x86/regloc.py
+++ b/pypy/jit/backend/x86/regloc.py
@@ -557,6 +557,7 @@
MOVSD = _binaryop('MOVSD')
MOVAPD = _binaryop('MOVAPD')
MOVDQA = _binaryop('MOVDQA')
+ MOVDQU = _binaryop('MOVDQU')
ADDSD = _binaryop('ADDSD')
ADDPD = _binaryop('ADDPD')
SUBSD = _binaryop('SUBSD')
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -726,6 +726,10 @@
regtype='XMM')
define_modrm_modes('MOVDQA_*x', ['\x66', rex_nw, '\x0F\x7F', register(2, 8)],
regtype='XMM')
+define_modrm_modes('MOVDQU_x*', ['\xF3', rex_nw, '\x0F\x6F', register(1, 8)],
+ regtype='XMM')
+define_modrm_modes('MOVDQU_*x', ['\xF3', rex_nw, '\x0F\x7F', register(2, 8)],
+ regtype='XMM')
define_modrm_modes('SQRTSD_x*', ['\xF2', rex_nw, '\x0F\x51', register(1,8)],
regtype='XMM')
diff --git a/pypy/jit/backend/x86/test/test_runner.py
b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -518,6 +518,58 @@
assert self.cpu.get_latest_value_int(3) == 42
+ def test_vector_spill(self):
+ A = lltype.Array(lltype.Float, hints={'nolength': True,
+ 'memory_position_alignment':
16})
+ descr0 = self.cpu.arraydescrof(A)
+ looptoken = JitCellToken()
+ ops = parse("""
+ [p0, p1]
+ vec0 = getarrayitem_vector_raw(p0, 0, descr=descr0)
+ vec1 = getarrayitem_vector_raw(p1, 2, descr=descr0)
+ vec2 = getarrayitem_vector_raw(p1, 4, descr=descr0)
+ vec3 = getarrayitem_vector_raw(p1, 6, descr=descr0)
+ vec4 = getarrayitem_vector_raw(p1, 8, descr=descr0)
+ vec5 = getarrayitem_vector_raw(p1, 10, descr=descr0)
+ vec6 = getarrayitem_vector_raw(p1, 12, descr=descr0)
+ vec7 = getarrayitem_vector_raw(p1, 14, descr=descr0)
+ vec8 = getarrayitem_vector_raw(p1, 16, descr=descr0)
+ vec9 = getarrayitem_vector_raw(p1, 18, descr=descr0)
+ vec10 = getarrayitem_vector_raw(p1, 20, descr=descr0)
+ vec11 = getarrayitem_vector_raw(p1, 22, descr=descr0)
+ vec12 = getarrayitem_vector_raw(p1, 24, descr=descr0)
+ vec13 = getarrayitem_vector_raw(p1, 26, descr=descr0)
+ vec14 = getarrayitem_vector_raw(p1, 28, descr=descr0)
+ vec15 = getarrayitem_vector_raw(p1, 30, descr=descr0)
+ vec16 = float_vector_add(vec0, vec1)
+ vec17 = float_vector_add(vec16, vec2)
+ vec18 = float_vector_add(vec17, vec3)
+ vec19 = float_vector_add(vec18, vec4)
+ vec20 = float_vector_add(vec19, vec5)
+ vec21 = float_vector_add(vec20, vec6)
+ vec22 = float_vector_add(vec21, vec7)
+ vec23 = float_vector_add(vec22, vec8)
+ vec24 = float_vector_add(vec23, vec9)
+ vec25 = float_vector_add(vec24, vec10)
+ vec26 = float_vector_add(vec25, vec11)
+ vec27 = float_vector_add(vec26, vec12)
+ vec28 = float_vector_add(vec27, vec13)
+ vec29 = float_vector_add(vec28, vec14)
+ vec30 = float_vector_add(vec29, vec15)
+ setarrayitem_vector_raw(p0, 0, vec30, descr=descr0)
+ finish()
+ """, namespace=locals())
+ self.cpu.compile_loop(ops.inputargs, ops.operations, looptoken)
+ a = lltype.malloc(A, 32, flavor='raw')
+ assert rffi.cast(lltype.Signed, a) % 16 == 0
+ for i in range(32):
+ a[i] = float(i)
+ self.cpu.execute_token(looptoken, a, a)
+ assert a[0] == 16 * 15
+ assert a[1] == 16 * 16
+ lltype.free(a, flavor='raw')
+
+
class TestDebuggingAssembler(object):
def setup_method(self, meth):
self.cpu = CPU(rtyper=None, stats=FakeStats())
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit