Author: Maciej Fijalkowski <[email protected]>
Branch: backend-vector-ops
Changeset: r52010:b60d7a3bcf8f
Date: 2012-02-01 14:42 +0200
http://bitbucket.org/pypy/pypy/changeset/b60d7a3bcf8f/
Log: Good. First go at vectorized operations - support double reading
writing and adding in the x86 backend. No spilling so far
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -10,6 +10,8 @@
# longlongs are supported by the JIT, but stored as doubles.
# Boxes and Consts are BoxFloats and ConstFloats.
supports_singlefloats = False
+ supports_vector_ops = False
+ # SSE and similar
done_with_this_frame_void_v = -1
done_with_this_frame_int_v = -1
diff --git a/pypy/jit/backend/test/runner_test.py
b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -3141,10 +3141,29 @@
assert fail.identifier == 42
def test_vector_ops(self):
- ops = """
- [p0]
- guard_array_aligned(p0) []
- """
+ if not self.cpu.supports_vector_ops:
+ py.test.skip("unsupported vector ops")
+
+ A = lltype.Array(lltype.Float, hints={'nolength': True,
+ 'memory_position_alignment':
16})
+ descr0 = self.cpu.arraydescrof(A)
+ looptoken = JitCellToken()
+ ops = parse("""
+ [p0, p1]
+ vec0 = getarrayitem_vector_raw(p0, 0, descr=descr0)
+ vec1 = getarrayitem_vector_raw(p1, 0, descr=descr0)
+ vec2 = float_vector_add(vec0, vec1)
+ setarrayitem_vector_raw(p0, 0, vec2, descr=descr0)
+ finish()
+ """, namespace=locals())
+ self.cpu.compile_loop(ops.inputargs, ops.operations, looptoken)
+ a = lltype.malloc(A, 10, flavor='raw')
+ a[0] = 13.0
+ a[1] = 15.0
+ self.cpu.execute_token(looptoken, a, a)
+ assert a[0] == 26
+ assert a[1] == 30
+ lltype.free(a, flavor='raw')
class OOtypeBackendTest(BaseBackendTest):
diff --git a/pypy/jit/backend/x86/assembler.py
b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -45,6 +45,7 @@
# darwin requires the stack to be 16 bytes aligned on calls. Same for gcc
4.5.0,
# better safe than sorry
CALL_ALIGN = 16 // WORD
+FLOAT_VECTOR_SIZE = 1 # multiply by 2
def align_stack_words(words):
return (words + CALL_ALIGN - 1) & ~(CALL_ALIGN-1)
@@ -1164,6 +1165,7 @@
genop_int_rshift = _binaryop("SAR")
genop_uint_rshift = _binaryop("SHR")
genop_float_add = _binaryop("ADDSD", True)
+ genop_float_vector_add = _binaryop("ADDPD", True)
genop_float_sub = _binaryop('SUBSD')
genop_float_mul = _binaryop('MULSD', True)
genop_float_truediv = _binaryop('DIVSD')
@@ -1458,6 +1460,13 @@
genop_getarrayitem_gc_pure = genop_getarrayitem_gc
genop_getarrayitem_raw = genop_getarrayitem_gc
+ def genop_getarrayitem_vector_raw(self, op, arglocs, resloc):
+ base_loc, ofs_loc, size_loc, _, sign_loc = arglocs
+ assert isinstance(size_loc, ImmedLoc)
+ scale = _get_scale(size_loc.value)
+ src_addr = addr_add(base_loc, ofs_loc, 0, scale)
+ self.mc.MOVDQA(resloc, src_addr)
+
def _get_interiorfield_addr(self, temp_loc, index_loc, itemsize_loc,
base_loc, ofs_loc):
assert isinstance(itemsize_loc, ImmedLoc)
@@ -1510,6 +1519,13 @@
dest_addr = AddressLoc(base_loc, ofs_loc, scale, baseofs.value)
self.save_into_mem(dest_addr, value_loc, size_loc)
+ def genop_discard_setarrayitem_vector_raw(self, op, arglocs):
+ base_loc, ofs_loc, value_loc, size_loc, _ = arglocs
+ assert isinstance(size_loc, ImmedLoc)
+ scale = _get_scale(size_loc.value)
+ dest_addr = AddressLoc(base_loc, ofs_loc, scale, 0)
+ self.mc.MOVDQA(dest_addr, value_loc)
+
def genop_discard_strsetitem(self, op, arglocs):
base_loc, ofs_loc, val_loc = arglocs
basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -5,7 +5,7 @@
import os
from pypy.jit.metainterp.history import (Box, Const, ConstInt, ConstPtr,
ResOperation, BoxPtr, ConstFloat,
- BoxFloat, INT, REF, FLOAT,
+ BoxFloat, INT, REF, FLOAT, VECTOR,
TargetToken, JitCellToken)
from pypy.jit.backend.x86.regloc import *
from pypy.rpython.lltypesystem import lltype, rffi, rstr
@@ -87,7 +87,7 @@
class X86XMMRegisterManager(RegisterManager):
- box_types = [FLOAT]
+ box_types = [FLOAT, VECTOR]
all_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]
# we never need lower byte I hope
save_around_call_regs = all_regs
@@ -256,7 +256,7 @@
return pass_on_stack
def possibly_free_var(self, var):
- if var.type == FLOAT:
+ if var.type in self.xrm.box_types:
self.xrm.possibly_free_var(var)
else:
self.rm.possibly_free_var(var)
@@ -274,7 +274,7 @@
def make_sure_var_in_reg(self, var, forbidden_vars=[],
selected_reg=None, need_lower_byte=False):
- if var.type == FLOAT:
+ if var.type in self.xrm.box_types:
if isinstance(var, ConstFloat):
return FloatImmedLoc(var.getfloatstorage())
return self.xrm.make_sure_var_in_reg(var, forbidden_vars,
@@ -285,7 +285,7 @@
def force_allocate_reg(self, var, forbidden_vars=[], selected_reg=None,
need_lower_byte=False):
- if var.type == FLOAT:
+ if var.type in self.xrm.box_types:
return self.xrm.force_allocate_reg(var, forbidden_vars,
selected_reg, need_lower_byte)
else:
@@ -293,7 +293,7 @@
selected_reg, need_lower_byte)
def force_spill_var(self, var):
- if var.type == FLOAT:
+ if var.type in self.xrm.box_types:
return self.xrm.force_spill_var(var)
else:
return self.rm.force_spill_var(var)
@@ -530,7 +530,7 @@
def loc(self, v):
if v is None: # xxx kludgy
return None
- if v.type == FLOAT:
+ if v.type in self.xrm.box_types:
return self.xrm.loc(v)
return self.rm.loc(v)
@@ -701,6 +701,7 @@
self.xrm.possibly_free_vars_for_op(op)
consider_float_add = _consider_float_op
+ consider_float_vector_add = _consider_float_op
consider_float_sub = _consider_float_op
consider_float_mul = _consider_float_op
consider_float_truediv = _consider_float_op
@@ -1080,6 +1081,7 @@
imm(itemsize), imm(ofs)])
consider_setarrayitem_raw = consider_setarrayitem_gc
+ consider_setarrayitem_vector_raw = consider_setarrayitem_gc
def consider_getfield_gc(self, op):
ofs_loc, size_loc, sign = self._unpack_fielddescr(op.getdescr())
@@ -1112,6 +1114,7 @@
sign_loc], result_loc)
consider_getarrayitem_raw = consider_getarrayitem_gc
+ consider_getarrayitem_vector_raw = consider_getarrayitem_gc
consider_getarrayitem_gc_pure = consider_getarrayitem_gc
def consider_getinteriorfield_gc(self, op):
diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py
--- a/pypy/jit/backend/x86/regloc.py
+++ b/pypy/jit/backend/x86/regloc.py
@@ -556,6 +556,7 @@
MOVSD = _binaryop('MOVSD')
MOVAPD = _binaryop('MOVAPD')
+ MOVDQA = _binaryop('MOVDQA')
ADDSD = _binaryop('ADDSD')
ADDPD = _binaryop('ADDPD')
SUBSD = _binaryop('SUBSD')
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -21,6 +21,7 @@
debug = True
supports_floats = True
supports_singlefloats = True
+ supports_vector_ops = True
dont_keepalive_stuff = False # for tests
with_threads = False
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -714,12 +714,18 @@
define_modrm_modes('MOVSX16_r*', [rex_w, '\x0F\xBF', register(1, 8)])
define_modrm_modes('MOVSX32_r*', [rex_w, '\x63', register(1, 8)])
-define_modrm_modes('MOVSD_x*', ['\xF2', rex_nw, '\x0F\x10', register(1,8)],
regtype='XMM')
-define_modrm_modes('MOVSD_*x', ['\xF2', rex_nw, '\x0F\x11', register(2,8)],
regtype='XMM')
+define_modrm_modes('MOVSD_x*', ['\xF2', rex_nw, '\x0F\x10', register(1,8)],
+ regtype='XMM')
+define_modrm_modes('MOVSD_*x', ['\xF2', rex_nw, '\x0F\x11', register(2,8)],
+ regtype='XMM')
define_modrm_modes('MOVAPD_x*', ['\x66', rex_nw, '\x0F\x28', register(1,8)],
regtype='XMM')
define_modrm_modes('MOVAPD_*x', ['\x66', rex_nw, '\x0F\x29', register(2,8)],
regtype='XMM')
+define_modrm_modes('MOVDQA_x*', ['\x66', rex_nw, '\x0F\x6F', register(1, 8)],
+ regtype='XMM')
+define_modrm_modes('MOVDQA_*x', ['\x66', rex_nw, '\x0F\x7F', register(2, 8)],
+ regtype='XMM')
define_modrm_modes('SQRTSD_x*', ['\xF2', rex_nw, '\x0F\x51', register(1,8)],
regtype='XMM')
diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -273,6 +273,9 @@
# ____________________________________________________________
+IGNORED = ['FLOAT_VECTOR_ADD', 'GETARRAYITEM_VECTOR_RAW',
+ 'SETARRAYITEM_VECTOR_RAW']
+
def _make_execute_list():
if 0: # enable this to trace calls to do_xxx
def wrap(fn):
@@ -349,7 +352,8 @@
rop.LABEL,
): # list of opcodes never executed by pyjitpl
continue
- raise AssertionError("missing %r" % (key,))
+ if not key in IGNORED:
+ raise AssertionError("missing %r" % (key,))
return execute_by_num_args
def make_execute_function_with_boxes(name, func):
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -482,17 +482,14 @@
def repr_rpython(self):
return repr_rpython(self, 'bi')
-class BoxFloatVector(Box):
+class BoxVector(Box):
type = VECTOR
- def __init__(self, floats):
- self.floats = floats
+ def __init__(self):
+ pass
-class BoxIntVector(Box):
- type = VECTOR
-
- def __init__(self, ints):
- self.ints = ints
+ def _getrepr_(self):
+ return ''
class BoxFloat(Box):
type = FLOAT
diff --git a/pypy/jit/metainterp/resoperation.py
b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -489,7 +489,7 @@
'SETARRAYITEM_GC/3d',
'SETARRAYITEM_RAW/3d',
- 'SETARRAYITEM_VECTOR_RAW/2d',
+ 'SETARRAYITEM_VECTOR_RAW/3d',
'SETINTERIORFIELD_GC/3d',
'SETINTERIORFIELD_RAW/3d',
'SETFIELD_GC/2d',
diff --git a/pypy/jit/tool/oparser.py b/pypy/jit/tool/oparser.py
--- a/pypy/jit/tool/oparser.py
+++ b/pypy/jit/tool/oparser.py
@@ -114,6 +114,9 @@
elif elem.startswith('f'):
box = self.model.BoxFloat()
_box_counter_more_than(self.model, elem[1:])
+ elif elem.startswith('vec'):
+ box = self.model.BoxVector()
+ _box_counter_more_than(self.model, elem[3:])
elif elem.startswith('p'):
# pointer
ts = getattr(self.cpu, 'ts', self.model.llhelper)
diff --git a/pypy/jit/tool/oparser_model.py b/pypy/jit/tool/oparser_model.py
--- a/pypy/jit/tool/oparser_model.py
+++ b/pypy/jit/tool/oparser_model.py
@@ -4,7 +4,7 @@
def get_real_model():
class LoopModel(object):
from pypy.jit.metainterp.history import TreeLoop, JitCellToken
- from pypy.jit.metainterp.history import Box, BoxInt, BoxFloat
+ from pypy.jit.metainterp.history import Box, BoxInt, BoxFloat,
BoxVector
from pypy.jit.metainterp.history import ConstInt, ConstObj, ConstPtr,
ConstFloat
from pypy.jit.metainterp.history import BasicFailDescr, TargetToken
from pypy.jit.metainterp.typesystem import llhelper
@@ -76,6 +76,9 @@
class BoxRef(Box):
type = 'p'
+ class BoxVector(Box):
+ type = 'e'
+
class Const(object):
def __init__(self, value=None):
self.value = value
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit