Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r77543:bccd719ea178
Date: 2015-05-25 17:01 +0200
http://bitbucket.org/pypy/pypy/changeset/bccd719ea178/
Log: removed the const arg for each vecop (but not load) rewrote the
unpacking/packing and gave it a new structure (has moved down to the
OpToVectorOp class) now displaying more info about the vector box
v<number>[<type><bits>#<count>] adjusted tests
diff --git a/rpython/jit/backend/llgraph/runner.py
b/rpython/jit/backend/llgraph/runner.py
--- a/rpython/jit/backend/llgraph/runner.py
+++ b/rpython/jit/backend/llgraph/runner.py
@@ -673,9 +673,8 @@
# vector operations
vector_arith_code = """
- def bh_vec_{0}_{1}(self, vx, vy, count):
- assert len(vx) == count
- assert len(vy) == count
+ def bh_vec_{0}_{1}(self, vx, vy):
+ assert len(vx) == len(vy)
return [_vx {2} _vy for _vx,_vy in zip(vx,vy)]
"""
exec py.code.Source(vector_arith_code.format('int','add','+')).compile()
@@ -686,9 +685,8 @@
exec py.code.Source(vector_arith_code.format('float','mul','*')).compile()
exec py.code.Source(vector_arith_code.format('float','eq','==')).compile()
- def bh_vec_float_eq(self, vx, vy, count):
- assert len(vx) == count
- assert len(vy) == count
+ def bh_vec_float_eq(self, vx, vy):
+ assert len(vx) == len(vy)
return [_vx == _vy for _vx,_vy in zip(vx,vy)]
def bh_vec_cast_float_to_singlefloat(self, vx):
@@ -706,7 +704,7 @@
def bh_vec_expand(self, x, count):
return [x] * count
- def bh_vec_int_signext(self, vx, ext, count):
+ def bh_vec_int_signext(self, vx, ext):
return [heaptracker.int_signext(_vx, ext) for _vx in vx]
def bh_vec_getarrayitem_raw(self, struct, offset, count, descr):
@@ -715,6 +713,7 @@
val = self.bh_getarrayitem_raw(struct, offset + i, descr)
values.append(val)
return values
+
def bh_vec_raw_load(self, struct, offset, count, descr):
values = []
stride = descr.get_item_size_in_bytes()
@@ -723,13 +722,14 @@
values.append(val)
return values
- def bh_vec_raw_store(self, struct, offset, newvalues, count, descr):
+ def bh_vec_raw_store(self, struct, offset, newvalues, descr):
stride = descr.get_item_size_in_bytes()
- for i in range(count):
- self.bh_raw_store(struct, offset + i*stride, newvalues[i], descr)
- def bh_vec_setarrayitem_raw(self, struct, offset, newvalues, count, descr):
- for i in range(count):
- self.bh_setarrayitem_raw(struct, offset + i, newvalues[i], descr)
+ for i,n in enumerate(newvalues):
+ self.bh_raw_store(struct, offset + i*stride, n, descr)
+
+ def bh_vec_setarrayitem_raw(self, struct, offset, newvalues, descr):
+ for i,n in enumerate(newvalues):
+ self.bh_setarrayitem_raw(struct, offset + i, n, descr)
def store_fail_descr(self, deadframe, descr):
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2705,7 +2705,6 @@
self.mc.CVTDQ2PD(resloc, arglocs[0])
def genop_vec_cast_singlefloat_to_float(self, op, arglocs, resloc):
- loc0, tmploc, indexloc = arglocs
self.mc.CVTPS2PD(resloc, arglocs[0])
# ________________________________________
diff --git a/rpython/jit/backend/x86/regalloc.py
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1509,7 +1509,7 @@
consider_vec_raw_store = consider_vec_setarrayitem_raw
def consider_vec_arith(self, op):
- lhs = op.getarg(1)
+ lhs = op.getarg(0)
assert isinstance(lhs, BoxVector)
size = lhs.item_size
args = op.getarglist()
@@ -1526,7 +1526,7 @@
del consider_vec_arith
def consider_vec_logic(self, op):
- lhs = op.getarg(1)
+ lhs = op.getarg(0)
assert isinstance(lhs, BoxVector)
size = lhs.item_size
args = op.getarglist()
@@ -1609,34 +1609,15 @@
def consider_guard_early_exit(self, op):
pass
- def consider_vec_cast_float_to_singlefloat(self, op):
- count = op.getarg(1)
- assert isinstance(count, ConstInt)
+ def consider_vec_cast_float_to_int(self, op):
args = op.getarglist()
- loc0 = self.make_sure_var_in_reg(op.getarg(0), args)
- result = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
- self.perform(op, [loc0, imm(count.value)], result)
-
- def consider_vec_cast_singlefloat_to_float(self, op):
- index = op.getarg(1)
- assert isinstance(index, ConstInt)
- args = op.getarglist()
- loc0 = self.make_sure_var_in_reg(op.getarg(0), args)
- result = self.force_allocate_reg(op.result, args)
- tmpxvar = TempBox()
- tmploc = self.xrm.force_allocate_reg(tmpxvar)
- self.xrm.possibly_free_var(tmpxvar)
- self.perform(op, [loc0, tmploc, imm(index.value)], result)
-
- def consider_vec_cast_float_to_int(self, op):
- src = op.getarg(0)
- res = op.result
- args = op.getarglist()
- srcloc = self.make_sure_var_in_reg(src, args)
- resloc = self.xrm.force_result_in_reg(res, src, args)
+ srcloc = self.make_sure_var_in_reg(op.getarg(0), args)
+ resloc = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
self.perform(op, [srcloc], resloc)
consider_vec_cast_int_to_float = consider_vec_cast_float_to_int
+ consider_vec_cast_float_to_singlefloat = consider_vec_cast_float_to_int
+ consider_vec_cast_singlefloat_to_float = consider_vec_cast_float_to_int
# ________________________________________
diff --git a/rpython/jit/backend/x86/test/test_zrpy_vecopt.py
b/rpython/jit/backend/x86/test/test_zrpy_vecopt.py
--- a/rpython/jit/backend/x86/test/test_zrpy_vecopt.py
+++ b/rpython/jit/backend/x86/test/test_zrpy_vecopt.py
@@ -19,11 +19,6 @@
if kwds['jit']:
apply_jit(t, vectorize=True)
- #cbuilder = genc.CStandaloneBuilder(t, f, t.config)
- #cbuilder.generate_source(defines=cbuilder.DEBUG_DEFINES)
- #cbuilder.compile()
- #return cbuilder
-
class TestVecOptX86(object):
def test_translate(self):
jd = JitDriver(greens = [], reds = 'auto', vectorize=True)
diff --git a/rpython/jit/metainterp/history.py
b/rpython/jit/metainterp/history.py
--- a/rpython/jit/metainterp/history.py
+++ b/rpython/jit/metainterp/history.py
@@ -390,6 +390,9 @@
except AttributeError:
t = 'b'
self._str = '%s%d' % (t, Box._counter)
+ if self.type == VECTOR:
+ self._str += '[%s%d#%d]' % (self.item_type, self.item_size * 8,
+ self.item_count)
Box._counter += 1
return self._str
diff --git a/rpython/jit/metainterp/logger.py b/rpython/jit/metainterp/logger.py
--- a/rpython/jit/metainterp/logger.py
+++ b/rpython/jit/metainterp/logger.py
@@ -127,7 +127,7 @@
elif isinstance(arg, BoxFloat):
return 'f' + str(mv)
elif isinstance(arg, BoxVector):
- return 'v' + str(mv)
+ return 'v%s[%s%d#%d]' % (str(mv), arg.item_type, arg.item_size,
arg.item_count)
elif arg is None:
return 'None'
else:
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -872,12 +872,12 @@
i12 = int_add(i1, {stride})
v1 = vec_getarrayitem_raw(p0, i0, 2, descr={descr}arraydescr)
v2 = vec_getarrayitem_raw(p1, i0, 2, descr={descr}arraydescr)
- v3 = {op}(v1,v2,2)
- vec_setarrayitem_raw(p2, i0, v3, 2, descr={descr}arraydescr)
+ v3 = {op}(v1,v2)
+ vec_setarrayitem_raw(p2, i0, v3, descr={descr}arraydescr)
jump(p0,p1,p2,i12)
""".format(op='vec_'+op,descr=descr,stride=1)
loop = self.parse_loop(ops)
- vopt = self.schedule(loop,1)
+ vopt = self.schedule(loop, 1)
self.assert_equal(loop, self.parse_loop(vops))
def test_vschedule_trace_1(self):
@@ -907,8 +907,8 @@
guard_true(i18) []
v19 = vec_raw_load(i2, i6, 2, descr=intarraydescr)
v20 = vec_raw_load(i3, i6, 2, descr=intarraydescr)
- v21 = vec_int_add(v19, v20, 2)
- vec_raw_store(i4, i6, v21, 2, descr=intarraydescr)
+ v21 = vec_int_add(v19, v20)
+ vec_raw_store(i4, i6, v21, descr=intarraydescr)
jump(i13, i1, i2, i3, i4)
"""
vopt = self.schedule(self.parse_loop(ops),1)
@@ -925,8 +925,8 @@
jump(p0,i2)
"""
dead_code = '\n '.join([
- "i{t1} = int_add(i{t},1)\n i{s} = int_lt(i{t1}, 102)".format(
- i=i+1, t1=i+201, t=i+200, s=i+20)
+ "i{t1} = int_add(i0,{i})\n i{s} = int_lt(i{t1}, 102)".format(
+ i=i+2, t1=i+201, t=i+200, s=i+20)
for i in range(0,14)])
opt="""
[p0,i0]
@@ -986,7 +986,7 @@
i5 = int_lt(i4, 10)
v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr)
v3 = vec_int_expand(42)
- v2 = vec_int_mul(v1, v3, 2)
+ v2 = vec_int_mul(v1, v3)
jump(p0,i4)
"""
vopt = self.vectorize(self.parse_loop(ops),1)
@@ -1015,7 +1015,7 @@
i5 = int_lt(i4, 10)
v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr)
v3 = vec_float_expand(f3)
- v2 = vec_int_mul(v1, v3, 2)
+ v2 = vec_int_mul(v1, v3)
jump(p0,i4,f3)
"""
vopt = self.vectorize(self.parse_loop(ops),1)
@@ -1047,20 +1047,21 @@
i48 = int_add(i41, 8)
i51 = int_add(i37, 8)
i52 = int_ge(i50, i18)
- guard_false(i52) [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42,
i43, f34, i28, p36, i41]
- i55 = int_add(i46, 8)
- i54 = int_add(i48, 8)
- i56 = int_add(i51, 8)
- i53 = int_add(i50, 1)
- i57 = int_ge(i53, i18)
- guard_false(i57) [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42,
i43, f34, i28, p36, i41]
+ i637 = int_add(i28, 2)
+ i638 = int_ge(i637, i18)
+ guard_false(i638) [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42,
i43, f34, i28, p36, i41]
+ i55 = int_add(i44, 16)
+ i54 = int_add(i41, 16)
+ i56 = int_add(i37, 16)
+ i629 = same_as(i637)
+ i57 = int_ge(i629, i18)
v61 = vec_raw_load(i21, i44, 2, descr=floatarraydescr)
v62 = vec_raw_load(i4, i41, 2, descr=floatarraydescr)
- v63 = vec_float_add(v61, v62, 2)
- vec_raw_store(i0, i37, v63, 2, descr=floatarraydescr)
+ v63 = vec_float_add(v61, v62)
+ vec_raw_store(i0, i37, v63, descr=floatarraydescr)
f100 = vec_float_unpack(v61, 1, 1)
f101 = vec_float_unpack(v62, 1, 1)
- jump(p36, i53, p9, i56, p14, f100, p12, p38, f101, p39, i40, i54, p42,
i43, i55, i21, i4, i0, i18)
+ jump(p36, i629, p9, i56, p14, f100, p12, p38, f101, p39, i40, i54,
p42, i43, i55, i21, i4, i0, i18)
"""
vopt = self.vectorize(self.parse_loop(ops))
self.assert_equal(vopt.loop, self.parse_loop(opt))
@@ -1072,7 +1073,7 @@
f1 = getarrayitem_raw(p0, i1, descr=floatarraydescr)
i2 = cast_float_to_singlefloat(f1)
setarrayitem_raw(p1, i1, i2, descr=singlefloatarraydescr)
- i3 = int_sub(i1, 1)
+ i3 = int_add(i1, 1)
i4 = int_ge(i3, 36)
guard_false(i4) []
jump(p0, p1, i3)
@@ -1080,23 +1081,23 @@
opt = """
[p0, p1, i1]
guard_early_exit() []
- i3 = int_sub(i1, 1)
+ i3 = int_add(i1, 1)
i4 = int_ge(i3, 36)
- i50 = int_add(i1, -4)
+ i50 = int_add(i1, 4)
i51 = int_ge(i50, 36)
guard_false(i51) []
- i5 = int_sub(i3, 1)
+ i5 = int_add(i1, 2)
i8 = int_ge(i5, 36)
- i6 = int_sub(i5, 1)
+ i6 = int_add(i1, 3)
i11 = int_ge(i6, 36)
i7 = same_as(i50)
i14 = int_ge(i7, 36)
v17 = vec_getarrayitem_raw(p0, i1, 2, descr=floatarraydescr)
v18 = vec_getarrayitem_raw(p0, i5, 2, descr=floatarraydescr)
- v19 = vec_cast_float_to_singlefloat(v17, 2)
- v20 = vec_cast_float_to_singlefloat(v18, 2)
+ v19 = vec_cast_float_to_singlefloat(v17)
+ v20 = vec_cast_float_to_singlefloat(v18)
v21 = vec_float_pack(v19, v20, 2, 2)
- vec_setarrayitem_raw(p1, i1, v21, 4, descr=singlefloatarraydescr)
+ vec_setarrayitem_raw(p1, i1, v21, descr=singlefloatarraydescr)
jump(p0, p1, i7)
"""
vopt = self.vectorize(self.parse_loop(ops))
@@ -1116,7 +1117,7 @@
raw_store(p2, i4, i12, descr=singlefloatarraydescr)
i5 = int_add(i4, 4)
i186 = int_lt(i5, 100)
- guard_false(i186) []
+ guard_true(i186) []
jump(p0,p1,p2,i1,i5)
"""
opt = """
@@ -1127,30 +1128,30 @@
i186 = int_lt(i5, 100)
i500 = int_add(i4, 16)
i501 = int_lt(i500, 100)
- guard_false(i501) []
- i189 = int_add(i1, 4)
- i187 = int_add(i5, 4)
- i198 = int_add(i189, 4)
+ guard_true(i501) []
+ i189 = int_add(i0, 8)
+ i187 = int_add(i4, 8)
+ i198 = int_add(i0, 12)
i188 = int_lt(i187, 100)
- i207 = int_add(i198, 4)
- i196 = int_add(i187, 4)
+ i207 = int_add(i0, 16)
+ i196 = int_add(i4, 12)
i197 = int_lt(i196, 100)
i205 = same_as(i500)
i206 = int_lt(i205, 100)
v228 = vec_raw_load(p0, i0, 4, descr=singlefloatarraydescr)
- v229 = vec_cast_singlefloat_to_float(v228, 2)
+ v229 = vec_cast_singlefloat_to_float(v228)
v230 = vec_int_unpack(v228, 2, 2)
- v231 = vec_cast_singlefloat_to_float(v230, 2)
+ v231 = vec_cast_singlefloat_to_float(v230)
v232 = vec_raw_load(p1, i1, 4, descr=singlefloatarraydescr)
- v233 = vec_cast_singlefloat_to_float(v232, 2)
+ v233 = vec_cast_singlefloat_to_float(v232)
v234 = vec_int_unpack(v232, 2, 2)
- v235 = vec_cast_singlefloat_to_float(v234, 2)
- v236 = vec_float_add(v229, v233, 2)
- v237 = vec_float_add(v231, v235, 2)
- v238 = vec_cast_float_to_singlefloat(v236, 2)
- v239 = vec_cast_float_to_singlefloat(v237, 2)
+ v235 = vec_cast_singlefloat_to_float(v234)
+ v236 = vec_float_add(v229, v233)
+ v237 = vec_float_add(v231, v235)
+ v238 = vec_cast_float_to_singlefloat(v236)
+ v239 = vec_cast_float_to_singlefloat(v237)
v240 = vec_float_pack(v238, v239, 2, 2)
- vec_raw_store(p2, i4, v240, 4, descr=singlefloatarraydescr)
+ vec_raw_store(p2, i4, v240, descr=singlefloatarraydescr)
jump(p0, p1, p2, i207, i205)
"""
vopt = self.vectorize(self.parse_loop(ops))
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -62,8 +62,7 @@
from rpython.rtyper.lltypesystem.lloperation import llop
llop.debug_print_traceback(lltype.Void)
else:
- import py
- py.test.set_trace()
+ raise
finally:
debug_stop("vec-opt-loop")
@@ -310,7 +309,6 @@
if memref_a.is_adjacent_to(memref_b):
if self.packset.can_be_packed(node_a, node_b):
pair = Pair(node_a,node_b)
- pair.ptype =
PackType.by_descr(node_a.getoperation().getdescr())
self.packset.packs.append(pair)
def extend_packset(self):
@@ -498,7 +496,6 @@
self.stronger = False
def implies(self, guard, opt):
- #print self.cmp_op, "=>", guard.cmp_op, "?"
if self.op.getopnum() != guard.op.getopnum():
return False
@@ -509,8 +506,6 @@
# same operation
lc = self.compare(self.lhs, guard.lhs)
rc = self.compare(self.rhs, guard.rhs)
- #print "compare", self.lhs, guard.lhs, lc
- #print "compare", self.rhs, guard.rhs, rc
opnum = self.get_compare_opnum()
if opnum == -1:
return False
@@ -719,11 +714,12 @@
return self.count
@staticmethod
- def by_descr(descr):
+ def by_descr(descr, vec_reg_size):
_t = INT
if descr.is_array_of_floats() or descr.concrete_type == FLOAT:
_t = FLOAT
- pt = PackType(_t, descr.get_item_size_in_bytes(),
descr.is_item_signed())
+ size = descr.get_item_size_in_bytes()
+ pt = PackType(_t, size, descr.is_item_signed(), vec_reg_size // size)
return pt
def is_valid(self):
@@ -732,206 +728,117 @@
def new_vector_box(self, count):
return BoxVector(self.type, count, self.size, self.signed)
- def record_vbox(self, vbox):
- if self.type == PackType.UNKNOWN_TYPE:
- self.type = vbox.item_type
- assert self.type in (FLOAT, INT)
- self.signed = vbox.signed
- if vbox.item_size > self.size:
- self.size = vbox.item_size
+ def __repr__(self):
+ return 'PackType(%s, %d, %d, #%d)' % (self.type, self.size,
self.signed, self.count)
- def __repr__(self):
- return 'PackType(%s, %s, %s)' % (self.type, self.size, self.signed)
+ @staticmethod
+ def of(box, count=-1):
+ assert isinstance(box, BoxVector)
+ if count == -1:
+ count = box.item_count
+ return PackType(box.item_type, box.item_size, box.signed, count)
def clone(self):
- return PackType(self.type, self.size, self.signed)
+ return PackType(self.type, self.size, self.signed, self.count)
class OpToVectorOp(object):
- def __init__(self, arg_ptypes, result_ptype, has_ptype=False,
result_vsize_arg=-1):
+ def __init__(self, arg_ptypes, result_ptype, has_descr=False,
+ arg_clone_ptype=0,
+ needs_count_in_params=False):
self.arg_ptypes = list(arg_ptypes) # do not use a tuple. rpython
cannot union
self.result_ptype = result_ptype
- self.has_ptype = has_ptype
- self.result_vsize_arg = result_vsize_arg
+ self.has_descr = has_descr
+ self.arg_clone_ptype = arg_clone_ptype
+ self.needs_count_in_params = needs_count_in_params
+ self.preamble_ops = None
+ self.sched_data = None
- def has_result(self):
- return self.result_ptype is not None
-
- def get_result_ptype(self):
- return self.result_ptype
-
- def get_arg_ptype(self, i):
- if i < 0 or i >= len(self.arg_ptypes):
- return None
- return self.arg_ptypes[i]
-
- def vector_arg(self, i):
+ def is_vector_arg(self, i):
if i < 0 or i >= len(self.arg_ptypes):
return False
return self.arg_ptypes[i] is not None
-PT_FLOAT = PackType(FLOAT, 4, False)
-PT_FLOAT_2 = PackType(FLOAT, 4, False, count=2)
-PT_DOUBLE = PackType(FLOAT, 8, False)
-PT_INT_GENERIC = PackType(INT, -1, True)
-PT_INT64 = PackType(INT, 8, True)
-PT_INT32 = PackType(INT, 4, True)
-PT_INT32_2 = PackType(INT, 4, True, count=2)
-PT_FLOAT_GENERIC = PackType(INT, -1, True)
-PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, True)
+ def pack_ptype(self, op):
+ opnum = op.vector
+ args = op.getarglist()
+ result = op.result
+ if self.has_descr:
+ descr = op.getdescr()
+ return PackType.by_descr(descr, self.sched_data.vec_reg_size)
+ if self.arg_clone_ptype >= 0:
+ arg = args[self.arg_clone_ptype]
+ _, vbox = self.sched_data.box_to_vbox.get(arg, (-1, None))
+ if vbox:
+ return PackType.of(vbox)
-ROP_ARG_RES_VECTOR = {
- rop.VEC_INT_ADD: OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC),
PT_INT_GENERIC),
- rop.VEC_INT_SUB: OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC),
PT_INT_GENERIC),
- rop.VEC_INT_MUL: OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC),
PT_INT_GENERIC),
- rop.VEC_INT_AND: OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC),
PT_INT_GENERIC),
- rop.VEC_INT_OR: OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC),
PT_INT_GENERIC),
- rop.VEC_INT_XOR: OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC),
PT_INT_GENERIC),
- rop.VEC_INT_SIGNEXT: OpToVectorOp((PT_INT_GENERIC,), PT_INT_GENERIC,
result_vsize_arg=1),
+ def as_vector_operation(self, pack, sched_data, oplist):
+ self.sched_data = sched_data
+ self.preamble_ops = oplist
+ op0 = pack.operations[0].getoperation()
+ self.ptype = self.pack_ptype(op0)
- rop.VEC_FLOAT_ADD: OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC),
PT_FLOAT_GENERIC),
- rop.VEC_FLOAT_SUB: OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC),
PT_FLOAT_GENERIC),
- rop.VEC_FLOAT_MUL: OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC),
PT_FLOAT_GENERIC),
- rop.VEC_FLOAT_EQ: OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC),
PT_INT_GENERIC),
+ off = 0
+ stride = self.split_pack(pack)
+ while off < len(pack.operations):
+ ops = pack.operations[off:off+stride]
+ self.transform_pack(ops, off, stride)
+ off += stride
- rop.VEC_RAW_LOAD: OpToVectorOp((), PT_GENERIC, has_ptype=True),
- rop.VEC_GETARRAYITEM_RAW: OpToVectorOp((), PT_GENERIC, has_ptype=True),
- rop.VEC_RAW_STORE: OpToVectorOp((None,None,PT_GENERIC,), None,
has_ptype=True),
- rop.VEC_SETARRAYITEM_RAW: OpToVectorOp((None,None,PT_GENERIC,), None,
has_ptype=True),
+ self.preamble_ops = None
+ self.sched_data = None
+ self.ptype = None
- rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOp((PT_DOUBLE,), PT_FLOAT_2),
- rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOp((PT_FLOAT_2,), PT_DOUBLE),
- rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOp((PT_DOUBLE,), PT_INT32_2),
- rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOp((PT_INT32_2,), PT_DOUBLE),
-}
+ def split_pack(self, pack):
+ pack_count = len(pack.operations)
+ vec_reg_size = self.sched_data.vec_reg_size
+ if pack_count * self.ptype.getsize() > vec_reg_size:
+ return vec_reg_size // self.ptype.getsize()
+ return pack_count
-
-class VecScheduleData(SchedulerData):
- def __init__(self, vec_reg_size):
- self.box_to_vbox = {}
- self.unpack_rename_map = {}
- self.preamble_ops = None
- self.expansion_byte_count = -1
- self.vec_reg_size = vec_reg_size
- self.pack_ops = -1
- self.pack_off = -1
-
- def unpack_rename(self, arg):
- return self.unpack_rename_map.get(arg, arg)
-
- def rename_unpacked(self, arg, argdest):
- self.unpack_rename_map[arg] = argdest
-
- def as_vector_operation(self, pack):
- op_count = len(pack.operations)
- assert op_count > 1
- self.pack = pack
- # properties that hold for the pack are:
- # + isomorphism (see func above)
- # + tight packed (no room between vector elems)
- if pack.ptype is None:
- self.propagate_ptype()
-
- self.preamble_ops = []
- if pack.is_overloaded(self.vec_reg_size):
- self.preamble_ops = []
- stride = pack.size_in_bytes() // self.vec_reg_size
- for i in range(0, op_count, stride):
- self.pack_off = i
- self.pack_ops = stride
- self._as_vector_op()
- return self.preamble_ops
- else:
- self.pack_off = 0
- self.pack_ops = op_count
- self._as_vector_op()
- return self.preamble_ops
-
- def _as_vector_op(self):
- op0 = self.pack.operations[self.pack_off].getoperation()
- assert op0.vector != -1
- args = op0.getarglist()[:]
-
- tovector = ROP_ARG_RES_VECTOR.get(op0.vector, None)
- if tovector is None:
- raise NotImplementedError("vecop map entry missing. trans: pack ->
vop")
-
- args.append(ConstInt(self.pack_ops))
- vop = ResOperation(op0.vector, args, op0.result, op0.getdescr())
-
+ def transform_pack(self, ops, off, stride):
+ op = ops[0].getoperation()
+ args = op.getarglist()
+ if self.needs_count_in_params:
+ args.append(ConstInt(len(ops)))
+ result = op.result
+ descr = op.getdescr()
for i,arg in enumerate(args):
- arg_ptype = tovector.get_arg_ptype(i)
- if arg_ptype and tovector.has_ptype:
- arg_ptype = self.pack.ptype
- if arg_ptype is not None:
- if arg_ptype.size == -1:
- arg_ptype = self.pack.ptype
- self.vector_arg(vop, i, arg_ptype)
- if tovector.has_result():
- self.vector_result(vop, tovector)
-
+ if self.is_vector_arg(i):
+ args[i] = self.transform_argument(ops, args[i], i, off, stride)
+ #
+ result = self.transform_result(ops, result, off)
+ #
+ vop = ResOperation(op.vector, args, result, descr)
self.preamble_ops.append(vop)
- def propagate_ptype(self):
- op0 = self.pack.operations[0].getoperation()
- tovector = ROP_ARG_RES_VECTOR.get(op0.vector, None)
- if tovector is None:
- raise NotImplementedError("vecop map entry missing. trans: pack ->
vop")
- if tovector.has_ptype:
- assert False, "load/store must have ptypes attached from the
descriptor"
- args = op0.getarglist()[:]
- res_ptype = tovector.get_result_ptype().clone()
- for i,arg in enumerate(args):
- if tovector.vector_arg(i):
- _, vbox = self.box_to_vbox.get(arg, (-1, None))
- if vbox is not None:
- res_ptype.record_vbox(vbox)
- else:
- # vbox of a variable/constant is not present here
- pass
- self.pack.ptype = res_ptype
+ def transform_result(self, ops, result, off):
+ if result is None:
+ return None
+ vbox = self.new_result_vector_box()
+ #
+ # mark the position and the vbox in the hash
+ for i, node in enumerate(ops):
+ op = node.getoperation()
+ self.sched_data.setvector_of_box(op.result, i, vbox)
+ return vbox
- def vector_result(self, vop, tovector):
- ops = self.pack.operations
- ptype = tovector.get_result_ptype().clone()
- if tovector.has_ptype:
- ptype = self.pack.ptype
- count = -1
- if tovector.result_vsize_arg != -1:
- # vec_int_signext specifies the size in bytes on the
- # first argument.
- arg = vop.getarg(tovector.result_vsize_arg)
- assert isinstance(arg, ConstInt)
- count = arg.value
- else:
- count = self.pack_ops
- if ptype is not None:
- if ptype.size == -1:
- ptype.size = self.pack.ptype.size
- vbox = ptype.new_vector_box(count)
- else:
- vbox = self.pack.ptype.new_vector_box(count)
- #
- vop.result = vbox
- i = self.pack_off
- off = 0 # XXX assumption. the result is always placed at index
[0,...,x]
- end = i + self.pack_ops
- while i < end:
- op = ops[i].getoperation()
- self.box_in_vector(op.result, off, vbox)
- i += 1
- off += 1
+ def new_result_vector_box(self):
+ size = self.ptype.getsize()
+ count = self.ptype.getcount()
+ return BoxVector(self.ptype.gettype(), count, size, self.ptype.signed)
- def box_in_vector(self, box, off, vector):
- self.box_to_vbox[box] = (off, vector)
+ def transform_argument(self, ops, arg, argidx, off, count):
+ box_pos, vbox = self.sched_data.getvector_of_box(arg)
+ if not vbox:
+ # constant/variable expand this box
+ vbox = self.ptype.new_vector_box(count)
+ vbox = self.expand_box_to_vector_box(vbox, ops, arg, argidx)
+ box_pos = 0
- def vector_arg(self, vop, argidx, arg_ptype):
- ops = self.pack.operations
- _, vbox = self.box_to_vbox.get(vop.getarg(argidx), (-1, None))
- if not vbox:
- vbox = self.expand_box_to_vector_box(vop, argidx)
- # vbox is a primitive type mixin
- packable = self.vec_reg_size // arg_ptype.getsize()
+ # use the input as an indicator for the pack type
+ arg_ptype = PackType.of(vbox)
+ packable = self.sched_data.vec_reg_size // arg_ptype.getsize()
packed = vbox.item_count
assert packed >= 0
assert packable >= 0
@@ -941,26 +848,22 @@
vbox = self._pack(vbox, packed, args, packable)
elif packed > packable:
# the argument has more items than the operation is able to
process!
- vbox = self.unpack(vbox, self.pack_off, packable, arg_ptype)
+ vbox = self.unpack(vbox, off, packable, arg_ptype)
vbox = self.extend(vbox, arg_ptype)
-
+ # continue to handle the rest of the vbox
+ #
# The instruction takes less items than the vector has.
- # Unpack if not at pack_off 0
- count = arg_ptype.getcount()
- if count != -1 and count < vbox.item_count:
- if self.pack_off == 0:
- pass # right place already
- else:
- vbox = self.unpack(vbox, self.pack_off, count, arg_ptype)
-
- vop.setarg(argidx, vbox)
+ # Unpack if not at off 0
+ if off != 0 and box_pos != 0:
+ vbox = self.unpack(vbox, off, count, arg_ptype)
+ #
return vbox
def extend(self, vbox, arg_ptype):
- if vbox.item_count * vbox.item_size == self.vec_reg_size:
+ if vbox.item_count * vbox.item_size == self.sched_data.vec_reg_size:
return vbox
size = arg_ptype.getsize()
- assert (vbox.item_count * size) == self.vec_reg_size
+ assert (vbox.item_count * size) == self.sched_data.vec_reg_size
opnum = rop.VEC_INT_SIGNEXT
vbox_cloned = arg_ptype.new_vector_box(vbox.item_count)
op = ResOperation(opnum, [vbox, ConstInt(size),
ConstInt(vbox.item_count)], vbox_cloned)
@@ -991,7 +894,7 @@
i = index
while i < arg_count and tgt_box.item_count < packable:
arg = args[i]
- pos, src_box = self.box_to_vbox.get(arg, (-1, None))
+ pos, src_box = self.sched_data.getvector_of_box(arg)
if pos == -1:
i += 1
continue
@@ -1007,8 +910,9 @@
# at a new position
for j in range(i):
arg = args[j]
- self.box_in_vector(arg, j, new_box)
- _, vbox = self.box_to_vbox.get(args[0], (-1, None))
+ self.sched_data.setvector_of_box(arg, j, new_box)
+ tgt_box = new_box
+ _, vbox = self.sched_data.getvector_of_box(args[0])
return vbox
def _check_vec_pack(self, op):
@@ -1026,18 +930,13 @@
assert arg1.item_size == result.item_size
else:
assert count.value == 1
- assert index.value < result.item_size
- assert index.value + count.value <= result.item_size
+ assert index.value < result.item_count
+ assert index.value + count.value <= result.item_count
assert result.item_count > arg0.item_count
- def expand_box_to_vector_box(self, vop, argidx):
- arg = vop.getarg(argidx)
+ def expand_box_to_vector_box(self, vbox, ops, arg, argidx):
all_same_box = True
- ops = self.pack.operations
- i = self.pack_off
- end = i + self.pack_ops
- while i < end:
- op = ops[i]
+ for i, op in enumerate(ops):
if arg is not op.getoperation().getarg(argidx):
all_same_box = False
break
@@ -1050,8 +949,6 @@
if box_type == INT:
expand_opnum = rop.VEC_INT_EXPAND
- # TODO
- vbox = BoxVector(box_type, self.pack_ops)
if all_same_box:
expand_op = ResOperation(expand_opnum, [arg], vbox)
self.preamble_ops.append(expand_op)
@@ -1068,6 +965,142 @@
self.preamble_ops.append(resop)
return vbox
+class OpToVectorOpConv(OpToVectorOp):
+ def __init__(self, intype, outtype):
+ OpToVectorOp.__init__(self, (intype,), outtype)
+ self.from_size = intype.getsize()
+ self.to_size = outtype.getsize()
+
+ def split_pack(self, pack):
+ if self.from_size > self.to_size:
+ # cast down
+ return OpToVectorOp.split_pack(self, pack)
+ op0 = pack.operations[0].getoperation()
+ _, vbox = self.sched_data.getvector_of_box(op0.getarg(0))
+ vec_reg_size = self.sched_data.vec_reg_size
+ if vbox.getcount() * self.to_size > vec_reg_size:
+ return vec_reg_size // self.to_size
+ return len(pack.operations)
+
+ def new_result_vector_box(self):
+ size = self.to_size
+ count = self.ptype.getcount()
+ vec_reg_size = self.sched_data.vec_reg_size
+ if count * size > vec_reg_size:
+ count = vec_reg_size // size
+ return BoxVector(self.result_ptype.gettype(), count, size,
self.ptype.signed)
+
+class SignExtToVectorOp(OpToVectorOp):
+ def __init__(self, intype, outtype):
+ OpToVectorOp.__init__(self, (intype,), outtype)
+ self.size = -1
+
+ def split_pack(self, pack):
+ op0 = pack.operations[0].getoperation()
+ sizearg = op0.getarg(1)
+ assert isinstance(sizearg, ConstInt)
+ self.size = sizearg.value
+ if self.ptype.getsize() > self.size:
+ # cast down
+ return OpToVectorOp.split_pack(self, pack)
+ _, vbox = self.sched_data.getvector_of_box(op0.getarg(0))
+ vec_reg_size = self.sched_data.vec_reg_size
+ if vbox.getcount() * self.size > vec_reg_size:
+ return vec_reg_size // self.to_size
+ return vbox.getcount()
+
+ def new_result_vector_box(self):
+ count = self.ptype.getcount()
+ vec_reg_size = self.sched_data.vec_reg_size
+ if count * self.size > vec_reg_size:
+ count = vec_reg_size // self.size
+ return BoxVector(self.result_ptype.gettype(), count, self.size,
self.ptype.signed)
+
+
+PT_FLOAT = PackType(FLOAT, 4, False)
+PT_DOUBLE = PackType(FLOAT, 8, False)
+PT_FLOAT_GENERIC = PackType(INT, -1, True)
+PT_INT64 = PackType(INT, 8, True)
+PT_INT32 = PackType(INT, 4, True)
+PT_INT_GENERIC = PackType(INT, -1, True)
+PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, True)
+
+INT_RES = PT_INT_GENERIC
+FLOAT_RES = PT_FLOAT_GENERIC
+LOAD_RES = PT_GENERIC
+
+ROP_ARG_RES_VECTOR = {
+ rop.VEC_INT_ADD: OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC),
INT_RES),
+ rop.VEC_INT_SUB: OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC),
INT_RES),
+ rop.VEC_INT_MUL: OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC),
INT_RES),
+ rop.VEC_INT_AND: OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC),
INT_RES),
+ rop.VEC_INT_OR: OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC),
INT_RES),
+ rop.VEC_INT_XOR: OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC),
INT_RES),
+
+ rop.VEC_INT_SIGNEXT: SignExtToVectorOp((PT_INT_GENERIC,), INT_RES),
+
+ rop.VEC_FLOAT_ADD: OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC),
FLOAT_RES),
+ rop.VEC_FLOAT_SUB: OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC),
FLOAT_RES),
+ rop.VEC_FLOAT_MUL: OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC),
FLOAT_RES),
+ rop.VEC_FLOAT_EQ: OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC),
INT_RES),
+
+ rop.VEC_RAW_LOAD: OpToVectorOp((), LOAD_RES, has_descr=True,
+ arg_clone_ptype=-2,
+ needs_count_in_params=True
+ ),
+ rop.VEC_GETARRAYITEM_RAW: OpToVectorOp((), LOAD_RES,
+ has_descr=True,
+ arg_clone_ptype=-2,
+ needs_count_in_params=True
+ ),
+ rop.VEC_RAW_STORE: OpToVectorOp((None,None,PT_GENERIC,), None,
has_descr=True, arg_clone_ptype=2),
+ rop.VEC_SETARRAYITEM_RAW: OpToVectorOp((None,None,PT_GENERIC,), None,
has_descr=True, arg_clone_ptype=2),
+
+ rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOpConv(PT_DOUBLE, PT_FLOAT),
+ rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(PT_FLOAT, PT_DOUBLE),
+ rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOpConv(PT_DOUBLE, PT_INT32),
+ rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOpConv(PT_INT32, PT_DOUBLE),
+}
+
+class VecScheduleData(SchedulerData):
+ def __init__(self, vec_reg_size):
+ self.box_to_vbox = {}
+ self.unpack_rename_map = {}
+ self.preamble_ops = None
+ self.expansion_byte_count = -1
+ self.vec_reg_size = vec_reg_size
+ self.pack_ops = -1
+ self.pack_off = -1
+
+ def unpack_rename(self, arg):
+ return self.unpack_rename_map.get(arg, arg)
+
+ def rename_unpacked(self, arg, argdest):
+ self.unpack_rename_map[arg] = argdest
+
+ def as_vector_operation(self, pack):
+ op_count = len(pack.operations)
+ assert op_count > 1
+ self.pack = pack
+ # properties that hold for the pack are:
+ # + isomorphism (see func above)
+ # + tight packed (no room between vector elems)
+
+ op0 = pack.operations[0].getoperation()
+ tovector = ROP_ARG_RES_VECTOR.get(op0.vector, None)
+ if tovector is None:
+ raise NotImplementedError("vecop map entry missing. trans: pack ->
vop")
+ oplist = []
+ tovector.as_vector_operation(pack, self, oplist)
+ return oplist
+
+ def getvector_of_box(self, arg):
+ return self.box_to_vbox.get(arg, (-1, None))
+
+ def setvector_of_box(self, box, off, vector):
+ self.box_to_vbox[box] = (off, vector)
+
+
def isomorphic(l_op, r_op):
""" Same instructions have the same operation name.
TODO what about parameters?
diff --git a/rpython/jit/metainterp/resoperation.py
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -453,26 +453,26 @@
# vector operations
'_VEC_PURE_FIRST',
'_VEC_ARITHMETIC_FIRST',
- 'VEC_INT_ADD/3',
- 'VEC_INT_SUB/3',
- 'VEC_INT_MUL/3',
- 'VEC_INT_AND/3',
- 'VEC_INT_OR/3',
- 'VEC_INT_XOR/3',
- 'VEC_FLOAT_ADD/3',
- 'VEC_FLOAT_SUB/3',
- 'VEC_FLOAT_MUL/3',
- 'VEC_FLOAT_DIV/3',
+ 'VEC_INT_ADD/2',
+ 'VEC_INT_SUB/2',
+ 'VEC_INT_MUL/2',
+ 'VEC_INT_AND/2',
+ 'VEC_INT_OR/2',
+ 'VEC_INT_XOR/2',
+ 'VEC_FLOAT_ADD/2',
+ 'VEC_FLOAT_SUB/2',
+ 'VEC_FLOAT_MUL/2',
+ 'VEC_FLOAT_DIV/2',
'_VEC_ARITHMETIC_LAST',
- 'VEC_FLOAT_EQ/3',
+ 'VEC_FLOAT_EQ/2',
- 'VEC_INT_SIGNEXT/3',
+ 'VEC_INT_SIGNEXT/2',
# double -> float: v2 = cast(v1, 2) equal to v2 = (v1[0], v1[1], X, X)
- 'VEC_CAST_FLOAT_TO_SINGLEFLOAT/2',
+ 'VEC_CAST_FLOAT_TO_SINGLEFLOAT/1',
# v4 = cast(v3, 0, 2), v4 = (v3[0], v3[1])
- 'VEC_CAST_SINGLEFLOAT_TO_FLOAT/2',
- 'VEC_CAST_FLOAT_TO_INT/2',
- 'VEC_CAST_INT_TO_FLOAT/2',
+ 'VEC_CAST_SINGLEFLOAT_TO_FLOAT/1',
+ 'VEC_CAST_FLOAT_TO_INT/1',
+ 'VEC_CAST_INT_TO_FLOAT/1',
'VEC_FLOAT_UNPACK/3', # iX|fX = VEC_FLOAT_UNPACK(vX, index,
item_count)
'VEC_FLOAT_PACK/4', # VEC_FLOAT_PACK(vX, var/const, index,
item_count)
@@ -553,9 +553,9 @@
'INCREMENT_DEBUG_COUNTER/1',
'SETARRAYITEM_GC/3d',
'SETARRAYITEM_RAW/3d',
- 'VEC_SETARRAYITEM_RAW/4d',
+ 'VEC_SETARRAYITEM_RAW/3d',
'RAW_STORE/3d',
- 'VEC_RAW_STORE/4d',
+ 'VEC_RAW_STORE/3d',
'SETINTERIORFIELD_GC/3d',
'SETINTERIORFIELD_RAW/3d', # right now, only used by tests
'SETFIELD_GC/2d',
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit