Author: Richard Plangger <[email protected]>
Branch: ppc-vsx-support
Changeset: r85398:3a5a728d551e
Date: 2016-06-27 15:59 +0200
http://bitbucket.org/pypy/pypy/changeset/3a5a728d551e/
Log: refactoring of vec_load/vec_store, now the same simplification (as
e.g. raw_load -> gc_load) is done arch dependant code now moved the
backend
diff --git a/rpython/jit/backend/llsupport/llmodel.py
b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -35,6 +35,7 @@
# can an ISA instruction handle a factor to the offset?
load_supported_factors = (1,)
+ vector_ext = None
vector_extension = False
vector_register_size = 0 # in bytes
vector_horizontal_operations = False
diff --git a/rpython/jit/backend/llsupport/rewrite.py
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -156,32 +156,12 @@
index_box = op.getarg(1)
self.emit_gc_load_or_indexed(op, ptr_box, index_box, itemsize,
itemsize, ofs, sign)
- def handle_rawload(self, op):
- itemsize, ofs, sign = unpack_arraydescr(op.getdescr())
- ptr_box = op.getarg(0)
- index_box = op.getarg(1)
- self.emit_gc_load_or_indexed(op, ptr_box, index_box, itemsize, 1, ofs,
sign)
-
def _emit_mul_if_factor_offset_not_supported(self, index_box,
factor, offset):
- # Returns (factor, offset, index_box) where index_box is either
- # a non-constant BoxInt or None.
- if isinstance(index_box, ConstInt):
- return 1, index_box.value * factor + offset, None
- else:
- if factor != 1 and factor not in self.cpu.load_supported_factors:
- # the factor is supported by the cpu
- # x & (x - 1) == 0 is a quick test for power of 2
- assert factor > 0
- if (factor & (factor - 1)) == 0:
- index_box = ResOperation(rop.INT_LSHIFT,
- [index_box, ConstInt(highest_bit(factor))])
- else:
- index_box = ResOperation(rop.INT_MUL,
- [index_box, ConstInt(factor)])
- self.emit_op(index_box)
- factor = 1
- return factor, offset, index_box
+ factor, offset, index_box = cpu_simplify_scale(self.cpu, indexbox,
factor, offset)
+ if index_box:
+ self.emit_op(index_box)
+ return factor, offset, index_box
def emit_gc_load_or_indexed(self, op, ptr_box, index_box, itemsize,
factor, offset, sign, type='i'):
@@ -985,3 +965,23 @@
self._newops.append(load_op)
self.gcrefs_recently_loaded[index] = load_op
return load_op
+
+def cpu_simplify_scale(cpu, index_box, factor, offset):
+ # Returns (factor, offset, index_box, [ops]) where index_box is either
+ # a non-constant BoxInt or None.
+ if isinstance(index_box, ConstInt):
+ return 1, index_box.value * factor + offset, None
+ else:
+ if factor != 1 and factor not in cpu.load_supported_factors:
+ # the factor is supported by the cpu
+ # x & (x - 1) == 0 is a quick test for power of 2
+ assert factor > 0
+ if (factor & (factor - 1)) == 0:
+ index_box = ResOperation(rop.INT_LSHIFT,
+ [index_box, ConstInt(highest_bit(factor))])
+ else:
+ index_box = ResOperation(rop.INT_MUL,
+ [index_box, ConstInt(factor)])
+ factor = 1
+ return factor, offset, index_box
+
diff --git a/rpython/jit/backend/llsupport/vector_ext.py
b/rpython/jit/backend/llsupport/vector_ext.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/llsupport/vector_ext.py
@@ -0,0 +1,235 @@
+from rpython.jit.backend.llsupport.descr import (unpack_arraydescr,
+ unpack_fielddescr, unpack_interiorfielddescr)
+from rpython.rlib.objectmodel import specialize, always_inline
+from rpython.jit.metainterp.history import (VECTOR, FLOAT, INT)
+from rpython.jit.metainterp.resoperation import rop
+
+class TypeRestrict(object):
+ ANY_TYPE = '\x00'
+ ANY_SIZE = -1
+ ANY_SIGN = -1
+ ANY_COUNT = -1
+ SIGNED = 1
+ UNSIGNED = 0
+
+ def __init__(self,
+ type=ANY_TYPE,
+ bytesize=ANY_SIZE,
+ count=ANY_SIGN,
+ sign=ANY_COUNT):
+ self.type = type
+ self.bytesize = bytesize
+ self.sign = sign
+ self.count = count
+
+ @always_inline
+ def any_size(self):
+ return self.bytesize == TypeRestrict.ANY_SIZE
+
+ @always_inline
+ def any_count(self):
+ return self.count == TypeRestrict.ANY_COUNT
+
+ def check(self, value):
+ vecinfo = forwarded_vecinfo(value)
+ assert vecinfo.datatype != '\x00'
+ if self.type != TypeRestrict.ANY_TYPE:
+ if self.type != vecinfo.datatype:
+ msg = "type mismatch %s != %s" % \
+ (self.type, vecinfo.datatype)
+ failnbail_transformation(msg)
+ assert vecinfo.bytesize > 0
+ if not self.any_size():
+ if self.bytesize != vecinfo.bytesize:
+ msg = "bytesize mismatch %s != %s" % \
+ (self.bytesize, vecinfo.bytesize)
+ failnbail_transformation(msg)
+ assert vecinfo.count > 0
+ if self.count != TypeRestrict.ANY_COUNT:
+ if vecinfo.count < self.count:
+ msg = "count mismatch %s < %s" % \
+ (self.count, vecinfo.count)
+ failnbail_transformation(msg)
+ if self.sign != TypeRestrict.ANY_SIGN:
+ if bool(self.sign) == vecinfo.sign:
+ msg = "sign mismatch %s < %s" % \
+ (self.sign, vecinfo.sign)
+ failnbail_transformation(msg)
+
+ def max_input_count(self, count):
+ """ How many """
+ if self.count != TypeRestrict.ANY_COUNT:
+ return self.count
+ return count
+
+class OpRestrict(object):
+ def __init__(self, argument_restris):
+ self.argument_restrictions = argument_restris
+
+ def check_operation(self, state, pack, op):
+ pass
+
+ def crop_vector(self, op, newsize, size):
+ return newsize, size
+
+ def must_crop_vector(self, op, index):
+ restrict = self.argument_restrictions[index]
+ vecinfo = forwarded_vecinfo(op.getarg(index))
+ size = vecinfo.bytesize
+ newsize = self.crop_to_size(op, index)
+ return not restrict.any_size() and newsize != size
+
+ @always_inline
+ def crop_to_size(self, op, index):
+ restrict = self.argument_restrictions[index]
+ return restrict.bytesize
+
+ def opcount_filling_vector_register(self, op, vec_reg_size):
+ """ How many operations of that kind can one execute
+ with a machine instruction of register size X?
+ """
+ if op.is_typecast():
+ if op.casts_down():
+ size = op.cast_input_bytesize(vec_reg_size)
+ return size // op.cast_from_bytesize()
+ else:
+ return vec_reg_size // op.cast_to_bytesize()
+ vecinfo = forwarded_vecinfo(op)
+ return vec_reg_size // vecinfo.bytesize
+
+class GuardRestrict(OpRestrict):
+ def opcount_filling_vector_register(self, op, vec_reg_size):
+ arg = op.getarg(0)
+ vecinfo = forwarded_vecinfo(arg)
+ return vec_reg_size // vecinfo.bytesize
+
+class LoadRestrict(OpRestrict):
+ def check_operation(self, state, pack, op):
+ opnum = op.getopnum()
+ if rop.is_getarrayitem(opnum) or \
+ opnum in (rop.GETARRAYITEM_RAW_I, rop.GETARRAYITEM_RAW_F):
+ itemsize, ofs, sign = unpack_arraydescr(op.getdescr())
+ index_box = op.getarg(1)
+ _, _, changed = cpu_simplify_scale(state.cpu, index_box, itemsize,
ofs)
+ if changed is not index_box:
+ state.oplist.append(changed)
+ op.setarg(1, changed)
+
+ def opcount_filling_vector_register(self, op, vec_reg_size):
+ assert rop.is_primitive_load(op.opnum)
+ descr = op.getdescr()
+ return vec_reg_size // descr.get_item_size_in_bytes()
+
+class StoreRestrict(OpRestrict):
+ def __init__(self, argument_restris):
+ self.argument_restrictions = argument_restris
+
+ def check_operation(self, state, pack, op):
+ opnum = op.getopnum()
+ if opnum in (rop.SETARRAYITEM_GC, rop.SETARRAYITEM_RAW):
+ itemsize, basesize, _ = unpack_arraydescr(op.getdescr())
+ index_box = op.getarg(1)
+ _, _, changed = cpu_simplify_scale(index_box, itemsize, basesize)
+ if changed is not index_box:
+ state.oplist.append(changed)
+ op.setarg(1, changed)
+
+ def must_crop_vector(self, op, index):
+ vecinfo = forwarded_vecinfo(op.getarg(index))
+ bytesize = vecinfo.bytesize
+ return self.crop_to_size(op, index) != bytesize
+
+ @always_inline
+ def crop_to_size(self, op, index):
+ # there is only one parameter that needs to be transformed!
+ descr = op.getdescr()
+ return descr.get_item_size_in_bytes()
+
+ def opcount_filling_vector_register(self, op, vec_reg_size):
+ assert rop.is_primitive_store(op.opnum)
+ descr = op.getdescr()
+ return vec_reg_size // descr.get_item_size_in_bytes()
+
+class OpMatchSizeTypeFirst(OpRestrict):
+ def check_operation(self, state, pack, op):
+ i = 0
+ infos = [forwarded_vecinfo(o) for o in op.getarglist()]
+ arg0 = op.getarg(i)
+ while arg0.is_constant() and i < op.numargs():
+ i += 1
+ arg0 = op.getarg(i)
+ vecinfo = forwarded_vecinfo(arg0)
+ bytesize = vecinfo.bytesize
+ datatype = vecinfo.datatype
+
+ for arg in op.getarglist():
+ if arg.is_constant():
+ continue
+ curvecinfo = forwarded_vecinfo(arg)
+ if curvecinfo.bytesize != bytesize:
+ raise NotAVectorizeableLoop()
+ if curvecinfo.datatype != datatype:
+ raise NotAVectorizeableLoop()
+
+TR_ANY = TypeRestrict()
+TR_ANY_FLOAT = TypeRestrict(FLOAT)
+TR_ANY_INTEGER = TypeRestrict(INT)
+TR_FLOAT_2 = TypeRestrict(FLOAT, 4, 2)
+TR_DOUBLE_2 = TypeRestrict(FLOAT, 8, 2)
+TR_INT32_2 = TypeRestrict(INT, 4, 2)
+
+OR_MSTF_I = OpMatchSizeTypeFirst([TR_ANY_INTEGER, TR_ANY_INTEGER])
+OR_MSTF_F = OpMatchSizeTypeFirst([TR_ANY_FLOAT, TR_ANY_FLOAT])
+STORE_RESTRICT = StoreRestrict([None, None, TR_ANY])
+LOAD_RESTRICT = LoadRestrict([])
+GUARD_RESTRICT = GuardRestrict([TR_ANY_INTEGER])
+
+
+class VectorExt(object):
+
+ # note that the following definition is x86 arch specific
+ TR_MAPPING = {
+ rop.VEC_INT_ADD: OR_MSTF_I,
+ rop.VEC_INT_SUB: OR_MSTF_I,
+ rop.VEC_INT_MUL: OR_MSTF_I,
+ rop.VEC_INT_AND: OR_MSTF_I,
+ rop.VEC_INT_OR: OR_MSTF_I,
+ rop.VEC_INT_XOR: OR_MSTF_I,
+ rop.VEC_INT_EQ: OR_MSTF_I,
+ rop.VEC_INT_NE: OR_MSTF_I,
+
+ rop.VEC_FLOAT_ADD: OR_MSTF_F,
+ rop.VEC_FLOAT_SUB: OR_MSTF_F,
+ rop.VEC_FLOAT_MUL: OR_MSTF_F,
+ rop.VEC_FLOAT_TRUEDIV: OR_MSTF_F,
+ rop.VEC_FLOAT_ABS: OpRestrict([TR_ANY_FLOAT]),
+ rop.VEC_FLOAT_NEG: OpRestrict([TR_ANY_FLOAT]),
+
+ rop.VEC_STORE: STORE_RESTRICT,
+
+ rop.VEC_LOAD_I: LOAD_RESTRICT,
+ rop.VEC_LOAD_F: LOAD_RESTRICT,
+
+ rop.VEC_GUARD_TRUE: GUARD_RESTRICT,
+ rop.VEC_GUARD_FALSE: GUARD_RESTRICT,
+
+ ## irregular
+ rop.VEC_INT_SIGNEXT: OpRestrict([TR_ANY_INTEGER]),
+
+ rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpRestrict([TR_DOUBLE_2]),
+ # weird but the trace will store single floats in int boxes
+ rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpRestrict([TR_INT32_2]),
+ rop.VEC_CAST_FLOAT_TO_INT: OpRestrict([TR_DOUBLE_2]),
+ rop.VEC_CAST_INT_TO_FLOAT: OpRestrict([TR_INT32_2]),
+
+ rop.VEC_FLOAT_EQ: OpRestrict([TR_ANY_FLOAT,TR_ANY_FLOAT]),
+ rop.VEC_FLOAT_NE: OpRestrict([TR_ANY_FLOAT,TR_ANY_FLOAT]),
+ rop.VEC_INT_IS_TRUE:
OpRestrict([TR_ANY_INTEGER,TR_ANY_INTEGER]),
+ }
+
+ def get_operation_restriction(self, op):
+ res = self.TR_MAPPING.get(op.vector, None)
+ if not res:
+ failnbail_transformation("could not get OpRestrict for " + str(op))
+ return res
+
diff --git a/rpython/jit/backend/ppc/codebuilder.py
b/rpython/jit/backend/ppc/codebuilder.py
--- a/rpython/jit/backend/ppc/codebuilder.py
+++ b/rpython/jit/backend/ppc/codebuilder.py
@@ -62,8 +62,9 @@
XFX = Form("CRM", "rS", "XO1")
XLL = Form("LL", "XO1")
XX1 = Form("fvrT", "rA", "rB", "XO1")
-XX2 = Form("fvrT", "fvrB", "XO5")
+XX2 = Form("fvrT", "fvrB", "XO6")
XX3 = Form("fvrT", "fvrA", "fvrB", "XO9")
+XX3_2 = Form("fvrT", "fvrA", "fvrB", "OE", "XO11")
XV = Form("ivrT", "rA", "rB", "XO1")
VX = Form("ivrT", "ivrA", "ivrB", "XO8")
VXI = Form("ivrT", "SIM", "XO8")
@@ -611,14 +612,22 @@
# div
xvdivdp = XX3(60, XO9=102)
xvdivsp = XX3(60, XO9=88)
+ # cmp
+ xvcmpeqdp = XX3_2(60, XO11=99, OE=0)
+ xvcmpeqdpx = XX3_2(60, XO11=99, OE=1)
+ xvcmpeqsp = XX3_2(60, XO11=67, OE=0)
+ xvcmpeqspx = XX3_2(60, XO11=67, OE=1)
+
+ # logical and and complement
+ xxlandc = XX3(60, XO9=138)
# neg
- xvnegdp = XX2(60, XO5=505)
- xvabssp = XX2(60, XO5=441)
+ xvnegdp = XX2(60, XO6=505)
+ xvnegsp = XX2(60, XO6=441)
# abs
- xvabsdp = XX2(60, XO5=473)
- xvabssp = XX2(60, XO5=409)
+ xvabsdp = XX2(60, XO6=473)
+ xvabssp = XX2(60, XO6=409)
# INTEGER
# -------
diff --git a/rpython/jit/backend/ppc/ppc_field.py
b/rpython/jit/backend/ppc/ppc_field.py
--- a/rpython/jit/backend/ppc/ppc_field.py
+++ b/rpython/jit/backend/ppc/ppc_field.py
@@ -64,6 +64,7 @@
"XO8": (21, 31),
"XO9": (21, 28),
"XO10": (26, 31),
+ "XO11": (22, 28),
"LL": ( 9, 10),
"SIM": (11, 15),
}
diff --git a/rpython/jit/backend/ppc/runner.py
b/rpython/jit/backend/ppc/runner.py
--- a/rpython/jit/backend/ppc/runner.py
+++ b/rpython/jit/backend/ppc/runner.py
@@ -3,6 +3,7 @@
from rpython.rlib import rgc
from rpython.rlib.jit_hooks import LOOP_RUN_CONTAINER
from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
+from rpython.jit.backend.ppc.vector_ext import AltiVectorExt
from rpython.jit.backend.ppc.ppc_assembler import AssemblerPPC
from rpython.jit.backend.ppc.arch import WORD
from rpython.jit.backend.ppc.codebuilder import PPCBuilder
@@ -11,6 +12,7 @@
class PPC_CPU(AbstractLLCPU):
+ vector_ext = None
vector_extension = False # may be set to true in setup
vector_register_size = 16
vector_horizontal_operations = False
@@ -47,6 +49,7 @@
def setup_once(self):
self.assembler.setup_once()
if detect_vsx():
+ self.vector_ext = AltiVectorExt()
self.vector_extension = True
# ??? self.vector_horizontal_operations = True
diff --git a/rpython/jit/backend/ppc/vector_ext.py
b/rpython/jit/backend/ppc/vector_ext.py
--- a/rpython/jit/backend/ppc/vector_ext.py
+++ b/rpython/jit/backend/ppc/vector_ext.py
@@ -12,6 +12,7 @@
from rpython.rtyper.lltypesystem import lltype
from rpython.jit.backend.ppc.locations import imm
from rpython.jit.backend.ppc.arch import IS_BIG_ENDIAN
+from rpython.jit.backend.llsupport.vector_ext import VectorExt
def not_implemented(msg):
msg = '[ppc/vector_ext] %s\n' % msg
@@ -19,6 +20,9 @@
llop.debug_print(lltype.Void, msg)
raise NotImplementedError(msg)
+class AltiVectorExt(VectorExt):
+ pass
+
class VectorAssembler(object):
_mixin_ = True
@@ -348,22 +352,20 @@
# # entries before) become ones
# self.mc.PCMPEQ(loc, temp, sizeloc.value)
- #def genop_vec_float_eq(self, op, arglocs, resloc):
- # _, rhsloc, sizeloc = arglocs
- # size = sizeloc.value
- # if size == 4:
- # self.mc.CMPPS_xxi(resloc.value, rhsloc.value, 0) # 0 means equal
- # else:
- # self.mc.CMPPD_xxi(resloc.value, rhsloc.value, 0)
+ def emit_vec_float_eq(self, op, arglocs, resloc):
+ resloc, loc1, loc2, sizeloc = arglocs
+ size = sizeloc.value
+ if size == 4:
+ self.mc.xvcmpeqspx(resloc.value, loc1.value, loc2.value)
+ elif size == 8:
+ self.mc.xvcmpeqdpx(resloc.value, loc1.value, loc2.value)
+ else:
+ notimplemented("[ppc/assembler] float == for size %d" % size)
- #def genop_vec_float_ne(self, op, arglocs, resloc):
- # _, rhsloc, sizeloc = arglocs
- # size = sizeloc.value
- # # b(100) == 1 << 2 means not equal
- # if size == 4:
- # self.mc.CMPPS_xxi(resloc.value, rhsloc.value, 1 << 2)
- # else:
- # self.mc.CMPPD_xxi(resloc.value, rhsloc.value, 1 << 2)
+ def emit_vec_float_ne(self, op, arglocs, resloc):
+ self.emit_vec_float_eq(op, arglocs, resloc)
+ resloc, loc1, loc2, sizeloc = arglocs
+ self.mc.xxlandc(resloc.value, resloc.value, resloc.value)
#def genop_vec_int_eq(self, op, arglocs, resloc):
# _, rhsloc, sizeloc = arglocs
@@ -629,8 +631,14 @@
prepare_vec_int_and = prepare_vec_arith
prepare_vec_int_or = prepare_vec_arith
prepare_vec_int_xor = prepare_vec_arith
+
+ prepare_vec_float_eq = prepare_vec_arith
+ prepare_vec_float_ne = prepare_vec_float_eq
+ prepare_vec_int_eq = prepare_vec_float_eq
+ prepare_vec_int_ne = prepare_vec_float_eq
del prepare_vec_arith
+
def _prepare_vec_store(self, op):
descr = op.getdescr()
assert isinstance(descr, ArrayDescr)
@@ -661,8 +669,6 @@
resloc = self.force_allocate_vector_reg(op)
return [resloc, loc0]
-
-
def prepare_vec_arith_unary(self, op):
a0 = op.getarg(0)
loc0 = self.ensure_vector_reg(a0)
@@ -674,19 +680,6 @@
prepare_vec_float_abs = prepare_vec_arith_unary
del prepare_vec_arith_unary
- #def prepare_vec_float_eq(self, op):
- # assert isinstance(op, VectorOp)
- # lhs = op.getarg(0)
- # assert isinstance(lhs, VectorOp)
- # args = op.getarglist()
- # rhsloc = self.make_sure_var_in_reg(op.getarg(1), args)
- # lhsloc = self.xrm.force_result_in_reg(op, op.getarg(0), args)
- # self.perform(op, [lhsloc, rhsloc, imm(lhs.bytesize)], lhsloc)
-
- #prepare_vec_float_ne = prepare_vec_float_eq
- #prepare_vec_int_eq = prepare_vec_float_eq
- #prepare_vec_int_ne = prepare_vec_float_eq
-
#def prepare_vec_pack_i(self, op):
# # new_res = vec_pack_i(res, src, index, count)
# assert isinstance(op, VectorOp)
diff --git a/rpython/jit/metainterp/executor.py
b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -393,19 +393,13 @@
rop.SAVE_EXC_CLASS,
rop.SAVE_EXCEPTION,
rop.RESTORE_EXCEPTION,
- rop.VEC_RAW_LOAD_I,
- rop.VEC_RAW_LOAD_F,
- rop.VEC_RAW_STORE,
- rop.VEC_GETARRAYITEM_RAW_I,
- rop.VEC_GETARRAYITEM_RAW_F,
- rop.VEC_SETARRAYITEM_RAW,
- rop.VEC_GETARRAYITEM_GC_I,
- rop.VEC_GETARRAYITEM_GC_F,
- rop.VEC_SETARRAYITEM_GC,
+ rop.VEC_LOAD_I,
+ rop.VEC_LOAD_F,
rop.GC_LOAD_I,
rop.GC_LOAD_R,
rop.GC_LOAD_F,
rop.GC_LOAD_INDEXED_R,
+ rop.VEC_STORE,
rop.GC_STORE,
rop.GC_STORE_INDEXED,
rop.LOAD_FROM_GC_TABLE,
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -1,3 +1,4 @@
+from rpython.jit.backend.llsupport.rewrite import cpu_simplify_scale
from rpython.jit.metainterp.history import (VECTOR, FLOAT, INT,
ConstInt, ConstFloat, TargetToken)
from rpython.jit.metainterp.resoperation import (rop, ResOperation,
@@ -26,7 +27,8 @@
return fwd
class SchedulerState(object):
- def __init__(self, graph):
+ def __init__(self, cpu, graph):
+ self.cpu = cpu
self.renamer = Renamer()
self.graph = graph
self.oplist = []
@@ -211,230 +213,16 @@
import pdb; pdb.set_trace()
raise NotImplementedError(msg)
-class TypeRestrict(object):
- ANY_TYPE = '\x00'
- ANY_SIZE = -1
- ANY_SIGN = -1
- ANY_COUNT = -1
- SIGNED = 1
- UNSIGNED = 0
-
- def __init__(self,
- type=ANY_TYPE,
- bytesize=ANY_SIZE,
- count=ANY_SIGN,
- sign=ANY_COUNT):
- self.type = type
- self.bytesize = bytesize
- self.sign = sign
- self.count = count
-
- @always_inline
- def any_size(self):
- return self.bytesize == TypeRestrict.ANY_SIZE
-
- @always_inline
- def any_count(self):
- return self.count == TypeRestrict.ANY_COUNT
-
- def check(self, value):
- vecinfo = forwarded_vecinfo(value)
- assert vecinfo.datatype != '\x00'
- if self.type != TypeRestrict.ANY_TYPE:
- if self.type != vecinfo.datatype:
- msg = "type mismatch %s != %s" % \
- (self.type, vecinfo.datatype)
- failnbail_transformation(msg)
- assert vecinfo.bytesize > 0
- if not self.any_size():
- if self.bytesize != vecinfo.bytesize:
- msg = "bytesize mismatch %s != %s" % \
- (self.bytesize, vecinfo.bytesize)
- failnbail_transformation(msg)
- assert vecinfo.count > 0
- if self.count != TypeRestrict.ANY_COUNT:
- if vecinfo.count < self.count:
- msg = "count mismatch %s < %s" % \
- (self.count, vecinfo.count)
- failnbail_transformation(msg)
- if self.sign != TypeRestrict.ANY_SIGN:
- if bool(self.sign) == vecinfo.sign:
- msg = "sign mismatch %s < %s" % \
- (self.sign, vecinfo.sign)
- failnbail_transformation(msg)
-
- def max_input_count(self, count):
- """ How many """
- if self.count != TypeRestrict.ANY_COUNT:
- return self.count
- return count
-
-class OpRestrict(object):
- def __init__(self, argument_restris):
- self.argument_restrictions = argument_restris
-
- def check_operation(self, state, pack, op):
- pass
-
- def crop_vector(self, op, newsize, size):
- return newsize, size
-
- def must_crop_vector(self, op, index):
- restrict = self.argument_restrictions[index]
- vecinfo = forwarded_vecinfo(op.getarg(index))
- size = vecinfo.bytesize
- newsize = self.crop_to_size(op, index)
- return not restrict.any_size() and newsize != size
-
- @always_inline
- def crop_to_size(self, op, index):
- restrict = self.argument_restrictions[index]
- return restrict.bytesize
-
- def opcount_filling_vector_register(self, op, vec_reg_size):
- """ How many operations of that kind can one execute
- with a machine instruction of register size X?
- """
- if op.is_typecast():
- if op.casts_down():
- size = op.cast_input_bytesize(vec_reg_size)
- return size // op.cast_from_bytesize()
- else:
- return vec_reg_size // op.cast_to_bytesize()
- vecinfo = forwarded_vecinfo(op)
- return vec_reg_size // vecinfo.bytesize
-
-class GuardRestrict(OpRestrict):
- def opcount_filling_vector_register(self, op, vec_reg_size):
- arg = op.getarg(0)
- vecinfo = forwarded_vecinfo(arg)
- return vec_reg_size // vecinfo.bytesize
-
-class LoadRestrict(OpRestrict):
- def opcount_filling_vector_register(self, op, vec_reg_size):
- assert rop.is_primitive_load(op.opnum)
- descr = op.getdescr()
- return vec_reg_size // descr.get_item_size_in_bytes()
-
-class StoreRestrict(OpRestrict):
- def __init__(self, argument_restris):
- self.argument_restrictions = argument_restris
-
- def must_crop_vector(self, op, index):
- vecinfo = forwarded_vecinfo(op.getarg(index))
- bytesize = vecinfo.bytesize
- return self.crop_to_size(op, index) != bytesize
-
- @always_inline
- def crop_to_size(self, op, index):
- # there is only one parameter that needs to be transformed!
- descr = op.getdescr()
- return descr.get_item_size_in_bytes()
-
- def opcount_filling_vector_register(self, op, vec_reg_size):
- assert rop.is_primitive_store(op.opnum)
- descr = op.getdescr()
- return vec_reg_size // descr.get_item_size_in_bytes()
-
-class OpMatchSizeTypeFirst(OpRestrict):
- def check_operation(self, state, pack, op):
- i = 0
- infos = [forwarded_vecinfo(o) for o in op.getarglist()]
- arg0 = op.getarg(i)
- while arg0.is_constant() and i < op.numargs():
- i += 1
- arg0 = op.getarg(i)
- vecinfo = forwarded_vecinfo(arg0)
- bytesize = vecinfo.bytesize
- datatype = vecinfo.datatype
-
- for arg in op.getarglist():
- if arg.is_constant():
- continue
- curvecinfo = forwarded_vecinfo(arg)
- if curvecinfo.bytesize != bytesize:
- raise NotAVectorizeableLoop()
- if curvecinfo.datatype != datatype:
- raise NotAVectorizeableLoop()
-
-class trans(object):
-
- TR_ANY = TypeRestrict()
- TR_ANY_FLOAT = TypeRestrict(FLOAT)
- TR_ANY_INTEGER = TypeRestrict(INT)
- TR_FLOAT_2 = TypeRestrict(FLOAT, 4, 2)
- TR_DOUBLE_2 = TypeRestrict(FLOAT, 8, 2)
- TR_INT32_2 = TypeRestrict(INT, 4, 2)
-
- OR_MSTF_I = OpMatchSizeTypeFirst([TR_ANY_INTEGER, TR_ANY_INTEGER])
- OR_MSTF_F = OpMatchSizeTypeFirst([TR_ANY_FLOAT, TR_ANY_FLOAT])
- STORE_RESTRICT = StoreRestrict([None, None, TR_ANY])
- LOAD_RESTRICT = LoadRestrict([])
- GUARD_RESTRICT = GuardRestrict([TR_ANY_INTEGER])
-
- # note that the following definition is x86 arch specific
- MAPPING = {
- rop.VEC_INT_ADD: OR_MSTF_I,
- rop.VEC_INT_SUB: OR_MSTF_I,
- rop.VEC_INT_MUL: OR_MSTF_I,
- rop.VEC_INT_AND: OR_MSTF_I,
- rop.VEC_INT_OR: OR_MSTF_I,
- rop.VEC_INT_XOR: OR_MSTF_I,
- rop.VEC_INT_EQ: OR_MSTF_I,
- rop.VEC_INT_NE: OR_MSTF_I,
-
- rop.VEC_FLOAT_ADD: OR_MSTF_F,
- rop.VEC_FLOAT_SUB: OR_MSTF_F,
- rop.VEC_FLOAT_MUL: OR_MSTF_F,
- rop.VEC_FLOAT_TRUEDIV: OR_MSTF_F,
- rop.VEC_FLOAT_ABS: OpRestrict([TR_ANY_FLOAT]),
- rop.VEC_FLOAT_NEG: OpRestrict([TR_ANY_FLOAT]),
-
- rop.VEC_RAW_STORE: STORE_RESTRICT,
- rop.VEC_SETARRAYITEM_RAW: STORE_RESTRICT,
- rop.VEC_SETARRAYITEM_GC: STORE_RESTRICT,
-
- rop.VEC_RAW_LOAD_I: LOAD_RESTRICT,
- rop.VEC_RAW_LOAD_F: LOAD_RESTRICT,
- rop.VEC_GETARRAYITEM_RAW_I: LOAD_RESTRICT,
- rop.VEC_GETARRAYITEM_RAW_F: LOAD_RESTRICT,
- rop.VEC_GETARRAYITEM_GC_I: LOAD_RESTRICT,
- rop.VEC_GETARRAYITEM_GC_F: LOAD_RESTRICT,
-
- rop.VEC_GUARD_TRUE: GUARD_RESTRICT,
- rop.VEC_GUARD_FALSE: GUARD_RESTRICT,
-
- ## irregular
- rop.VEC_INT_SIGNEXT: OpRestrict([TR_ANY_INTEGER]),
-
- rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpRestrict([TR_DOUBLE_2]),
- # weird but the trace will store single floats in int boxes
- rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpRestrict([TR_INT32_2]),
- rop.VEC_CAST_FLOAT_TO_INT: OpRestrict([TR_DOUBLE_2]),
- rop.VEC_CAST_INT_TO_FLOAT: OpRestrict([TR_INT32_2]),
-
- rop.VEC_FLOAT_EQ: OpRestrict([TR_ANY_FLOAT,TR_ANY_FLOAT]),
- rop.VEC_FLOAT_NE: OpRestrict([TR_ANY_FLOAT,TR_ANY_FLOAT]),
- rop.VEC_INT_IS_TRUE:
OpRestrict([TR_ANY_INTEGER,TR_ANY_INTEGER]),
- }
-
- @staticmethod
- def get(op):
- res = trans.MAPPING.get(op.vector, None)
- if not res:
- failnbail_transformation("could not get OpRestrict for " + str(op))
- return res
-
def turn_into_vector(state, pack):
""" Turn a pack into a vector instruction """
check_if_pack_supported(state, pack)
state.costmodel.record_pack_savings(pack, pack.numops())
left = pack.leftmost()
- oprestrict = trans.get(left)
+ oprestrict = state.cpu.vector_ext.get_operation_restriction(left)
if oprestrict is not None:
oprestrict.check_operation(state, pack, left)
args = left.getarglist_copy()
- prepare_arguments(state, pack, args)
+ prepare_arguments(state, oprestrict, pack, args)
vecop = VecOperation(left.vector, args, left,
pack.numops(), left.getdescr())
for i,node in enumerate(pack.operations):
@@ -449,7 +237,7 @@
state.oplist.append(vecop)
assert vecop.count >= 1
-def prepare_arguments(state, pack, args):
+def prepare_arguments(state, oprestrict, pack, args):
# Transforming one argument to a vector box argument
# The following cases can occur:
# 1) argument is present in the box_to_vbox map.
@@ -461,7 +249,6 @@
# a) expand vars/consts before the label and add as argument
# b) expand vars created in the loop body
#
- oprestrict = trans.MAPPING.get(pack.leftmost().vector, None)
if not oprestrict:
return
restrictions = oprestrict.argument_restrictions
@@ -684,9 +471,8 @@
class VecScheduleState(SchedulerState):
def __init__(self, graph, packset, cpu, costmodel):
- SchedulerState.__init__(self, graph)
+ SchedulerState.__init__(self, cpu, graph)
self.box_to_vbox = {}
- self.cpu = cpu
self.vec_reg_size = cpu.vector_register_size
self.expanded_map = {}
self.costmodel = costmodel
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py
b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -132,7 +132,7 @@
#
start = time.clock()
opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd,
warmstate.vec_cost)
- index_vars = opt.run_optimization(info, loop)
+ index_vars = opt.run_optimization(metainterp_sd, info, loop)
gso = GuardStrengthenOpt(index_vars)
gso.propagate_all_forward(info, loop, user_code)
end = time.clock()
@@ -222,7 +222,7 @@
self.smallest_type_bytes = 0
self.orig_label_args = None
- def run_optimization(self, info, loop):
+ def run_optimization(self, metainterp_sd, info, loop):
self.orig_label_args = loop.label.getarglist_copy()
self.linear_find_smallest_type(loop)
byte_count = self.smallest_type_bytes
@@ -235,7 +235,7 @@
# find index guards and move to the earliest position
graph = self.analyse_index_calculations(loop)
if graph is not None:
- state = SchedulerState(graph)
+ state = SchedulerState(metainterp_sd.cpu, graph)
self.schedule(state) # reorder the trace
# unroll
diff --git a/rpython/jit/metainterp/resoperation.py
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -1074,11 +1074,9 @@
'_RAW_LOAD_FIRST',
'GETARRAYITEM_GC/2d/rfi',
- 'VEC_GETARRAYITEM_GC/2d/fi',
'GETARRAYITEM_RAW/2d/fi',
- 'VEC_GETARRAYITEM_RAW/2d/fi',
'RAW_LOAD/2d/fi',
- 'VEC_RAW_LOAD/2d/fi',
+ 'VEC_LOAD/2d/fi',
'_RAW_LOAD_LAST',
'GETINTERIORFIELD_GC/2d/rfi',
@@ -1111,11 +1109,9 @@
'INCREMENT_DEBUG_COUNTER/1/n',
'_RAW_STORE_FIRST',
'SETARRAYITEM_GC/3d/n',
- 'VEC_SETARRAYITEM_GC/3d/n',
'SETARRAYITEM_RAW/3d/n',
- 'VEC_SETARRAYITEM_RAW/3d/n',
'RAW_STORE/3d/n',
- 'VEC_RAW_STORE/3d/n',
+ 'VEC_STORE/3d/n',
'_RAW_STORE_LAST',
'SETINTERIORFIELD_GC/3d/n',
'SETINTERIORFIELD_RAW/3d/n', # right now, only used by tests
@@ -1711,19 +1707,19 @@
rop.PTR_NE: rop.PTR_NE,
}
_opvector = {
- rop.RAW_LOAD_I: rop.VEC_RAW_LOAD_I,
- rop.RAW_LOAD_F: rop.VEC_RAW_LOAD_F,
- rop.GETARRAYITEM_RAW_I: rop.VEC_GETARRAYITEM_RAW_I,
- rop.GETARRAYITEM_RAW_F: rop.VEC_GETARRAYITEM_RAW_F,
- rop.GETARRAYITEM_GC_I: rop.VEC_GETARRAYITEM_GC_I,
- rop.GETARRAYITEM_GC_F: rop.VEC_GETARRAYITEM_GC_F,
+ rop.RAW_LOAD_I: rop.VEC_LOAD_I,
+ rop.RAW_LOAD_F: rop.VEC_LOAD_F,
+ rop.GETARRAYITEM_RAW_I: rop.VEC_LOAD_I,
+ rop.GETARRAYITEM_RAW_F: rop.VEC_LOAD_F,
+ rop.GETARRAYITEM_GC_I: rop.VEC_LOAD_I,
+ rop.GETARRAYITEM_GC_F: rop.VEC_LOAD_F,
# note that there is no _PURE operation for vector operations.
# reason: currently we do not care if it is pure or not!
- rop.GETARRAYITEM_GC_PURE_I: rop.VEC_GETARRAYITEM_GC_I,
- rop.GETARRAYITEM_GC_PURE_F: rop.VEC_GETARRAYITEM_GC_F,
- rop.RAW_STORE: rop.VEC_RAW_STORE,
- rop.SETARRAYITEM_RAW: rop.VEC_SETARRAYITEM_RAW,
- rop.SETARRAYITEM_GC: rop.VEC_SETARRAYITEM_GC,
+ rop.GETARRAYITEM_GC_PURE_I: rop.VEC_LOAD_I,
+ rop.GETARRAYITEM_GC_PURE_F: rop.VEC_LOAD_F,
+ rop.RAW_STORE: rop.VEC_STORE,
+ rop.SETARRAYITEM_RAW: rop.VEC_STORE,
+ rop.SETARRAYITEM_GC: rop.VEC_STORE,
rop.INT_ADD: rop.VEC_INT_ADD,
rop.INT_SUB: rop.VEC_INT_SUB,
diff --git a/rpython/jit/metainterp/test/test_vector.py
b/rpython/jit/metainterp/test/test_vector.py
--- a/rpython/jit/metainterp/test/test_vector.py
+++ b/rpython/jit/metainterp/test/test_vector.py
@@ -28,7 +28,11 @@
lltype.free(mem, flavor='raw')
def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):
- return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
+ return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) \
+ or (math.isnan(a) and math.isnan(b)) or \
+ (math.isinf(a) and math.isinf(b) and \
+ (a < 0.0 and b < 0.0) or \
+ (a > 0.0 and b > 0.0))
class RawStorage(object):
def __init__(self):
@@ -84,6 +88,8 @@
type_system=self.type_system,
vec=vec, vec_all=vec_all)
+ # FLOAT UNARY
+
def _vector_float_unary(self, func, type, data):
func = always_inline(func)
@@ -109,10 +115,7 @@
for i in range(l):
c = raw_storage_getitem(type,vc,i*size)
r = func(la[i])
- assert isclose(r, c) or (math.isnan(r) and math.isnan(c)) or \
- (math.isinf(r) and math.isinf(c) and \
- (r < 0.0 and c < 0.0) or \
- (r > 0.0 and c > 0.0))
+ assert isclose(r, c)
rawstorage.clear()
@@ -125,15 +128,14 @@
test_vec_abs_float = \
vec_float_unary(lambda v: abs(v), rffi.DOUBLE)
+ test_vec_neg_float = \
+ vec_float_unary(lambda v: -v, rffi.DOUBLE)
+ # FLOAT BINARY
- @given(data=st.data())
- @pytest.mark.parametrize('func', [lambda a,b: a+b,
- lambda a,b: a*b, lambda a,b: a-b])
- def test_vector_simple_float(self, func, data):
+ def _vector_simple_float(self, func, type, data):
func = always_inline(func)
- type = rffi.DOUBLE
size = rffi.sizeof(rffi.DOUBLE)
myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True)
def f(bytecount, va, vb, vc):
@@ -159,15 +161,29 @@
self.meta_interp(f, [l*size, va, vb, vc])
for i in range(l):
+ import pdb; pdb.set_trace()
c = raw_storage_getitem(type,vc,i*size)
r = func(la[i], lb[i])
- assert isclose(r, c) or (math.isnan(r) and math.isnan(c)) or \
- (math.isinf(r) and math.isinf(c) and \
- (r < 0.0 and c < 0.0) or \
- (r > 0.0 and c > 0.0))
+ assert isclose(r, c)
rawstorage.clear()
+ def _vec_float_binary(test_func, func, type):
+ return pytest.mark.parametrize('func,type', [
+ (func, type)
+ ])(given(data=st.data())(test_func))
+
+ vec_float_binary = functools.partial(_vec_float_binary,
_vector_simple_float)
+
+ test_vector_float_add = \
+ vec_float_binary(lambda a,b: a+b, rffi.DOUBLE)
+ test_vector_float_sub = \
+ vec_float_binary(lambda a,b: a-b, rffi.DOUBLE)
+ test_vector_float_mul = \
+ vec_float_binary(lambda a,b: a*b, rffi.DOUBLE)
+ #test_vector_float_div = \
+ # vec_float_binary(lambda a,b: a/b, rffi.DOUBLE)
+
def _vector_simple_int(self, func, type, data):
func = always_inline(func)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit