Author: Richard Plangger <[email protected]>
Branch: vecopt2
Changeset: r77124:1f1fd65e76ab
Date: 2015-04-24 18:43 +0200
http://bitbucket.org/pypy/pypy/changeset/1f1fd65e76ab/
Log: changes to make the rtyper work correctly, SIMD loads now only from
is now aligned (not correct, just for testing)
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2434,7 +2434,6 @@
def _vec_load(self, resloc, src_addr, integer, itemsize, aligned):
if integer:
if aligned:
- raise NotImplementedError
self.mc.MOVDQA(resloc, src_addr)
else:
self.mc.MOVDQU(resloc, src_addr)
@@ -2461,7 +2460,7 @@
def _vec_store(self, dest_loc, value_loc, integer, itemsize, aligned):
if integer:
if aligned:
- raise NotImplementedError
+ self.mc.MOVDQA(dest_loc, value_loc)
else:
self.mc.MOVDQU(dest_loc, value_loc)
else:
@@ -2473,7 +2472,11 @@
def genop_vec_int_add(self, op, arglocs, resloc):
loc0, loc1, itemsize_loc = arglocs
itemsize = itemsize_loc.value
- if itemsize == 4:
+ if itemsize == 1:
+ self.mc.PADDB(loc0, loc1)
+ elif itemsize == 2:
+ self.mc.PADDW(loc0, loc1)
+ elif itemsize == 4:
self.mc.PADDD(loc0, loc1)
elif itemsize == 8:
self.mc.PADDQ(loc0, loc1)
diff --git a/rpython/jit/backend/x86/regalloc.py
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1466,7 +1466,7 @@
not descr.is_array_of_structs()
itemsize, ofs, _ = unpack_arraydescr(descr)
integer = not descr.is_array_of_floats()
- aligned = False
+ aligned = True
args = op.getarglist()
base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
@@ -1487,7 +1487,7 @@
ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
integer = not descr.is_array_of_floats()
- aligned = False
+ aligned = True
self.perform_discard(op, [base_loc, ofs_loc, value_loc,
imm(itemsize), imm(ofs), imm(integer),
imm(aligned)])
diff --git a/rpython/jit/backend/x86/regloc.py
b/rpython/jit/backend/x86/regloc.py
--- a/rpython/jit/backend/x86/regloc.py
+++ b/rpython/jit/backend/x86/regloc.py
@@ -656,6 +656,7 @@
MOVSD = _binaryop('MOVSD')
MOVAPD = _binaryop('MOVAPD')
+ MOVDQA = _binaryop('MOVDQA')
MOVDQU = _binaryop('MOVDQU')
ADDSD = _binaryop('ADDSD')
ADDPD = _binaryop('ADDPD')
@@ -675,6 +676,8 @@
PADDQ = _binaryop('PADDQ')
PADDD = _binaryop('PADDD')
+ PADDW = _binaryop('PADDW')
+ PADDB = _binaryop('PADDB')
PSUBQ = _binaryop('PSUBQ')
PAND = _binaryop('PAND')
POR = _binaryop('POR')
diff --git a/rpython/jit/backend/x86/test/test_vectorize.py
b/rpython/jit/backend/x86/test/test_vectorize.py
--- a/rpython/jit/backend/x86/test/test_vectorize.py
+++ b/rpython/jit/backend/x86/test/test_vectorize.py
@@ -26,14 +26,13 @@
ptr[1] = rffi.r_int(b)
ptr[2] = rffi.r_int(c)
ptr[3] = rffi.r_int(d)
- return ConstAddressLoc(adr,4)
+ return adr
def test_simple_4_int_load_sum_x86_64(self):
def callback(asm):
if asm.mc.WORD != 8:
py.test.skip()
- loc = self.imm_4_int32(123,543,0,0)
- adr = loc.value
+ adr = self.imm_4_int32(123,543,0,0)
asm.mc.MOV_ri(r8.value,adr)
asm.mc.MOVDQU_xm(xmm7.value, (r8.value, 0))
asm.mc.PADDD_xm(xmm7.value, (r8.value, 0))
@@ -55,8 +54,8 @@
def test_vector_store(self):
def callback(asm):
- loc = self.imm_4_int32(11,12,13,14)
- asm.mov(ImmedLoc(loc.value), ecx)
+ addr = self.imm_4_int32(11,12,13,14)
+ asm.mov(ImmedLoc(addr), ecx)
asm.mc.MOVDQU_xm(xmm6.value, (ecx.value,0))
asm.mc.PADDD_xm(xmm6.value, (ecx.value,0))
asm.mc.MOVDQU(AddressLoc(ecx,ImmedLoc(0)), xmm6)
@@ -65,3 +64,17 @@
res = self.do_test(callback) & 0xffffffff
assert res == 22
+
+
+ def test_vector_store_aligned(self):
+ def callback(asm):
+ addr = self.imm_4_int32(11,12,13,14)
+ asm.mov(ImmedLoc(addr), ecx)
+ asm.mc.MOVDQA(xmm6, AddressLoc(ecx,ImmedLoc(0)))
+ asm.mc.PADDD_xm(xmm6.value, (ecx.value,0))
+ asm.mc.MOVDQA(AddressLoc(ecx,ImmedLoc(0)), xmm6)
+ asm.mc.MOVDQA(xmm6, AddressLoc(ecx,ImmedLoc(0)))
+ asm.mc.MOVDQ_rx(eax.value, xmm6.value)
+
+ res = self.do_test(callback) & 0xffffffff
+ assert res == 22
diff --git a/rpython/jit/metainterp/optimizeopt/__init__.py
b/rpython/jit/metainterp/optimizeopt/__init__.py
--- a/rpython/jit/metainterp/optimizeopt/__init__.py
+++ b/rpython/jit/metainterp/optimizeopt/__init__.py
@@ -68,8 +68,7 @@
loop.operations)
optimizations, unroll = build_opt_chain(metainterp_sd, enable_opts)
if warmstate.vectorize and jitdriver_sd.vectorize:
- optimize_vector(metainterp_sd, jitdriver_sd, loop,
- optimizations)
+ optimize_vector(metainterp_sd, jitdriver_sd, loop, optimizations)
elif unroll:
return optimize_unroll(metainterp_sd, jitdriver_sd, loop,
optimizations, inline_short_preamble,
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py
b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -2,7 +2,8 @@
from rpython.jit.metainterp import compile
from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method
-from rpython.jit.metainterp.resoperation import rop
+from rpython.jit.metainterp.resoperation import (rop, GuardResOp)
+from rpython.jit.metainterp.resume import Snapshot
from rpython.jit.codewriter.effectinfo import EffectInfo
from rpython.jit.metainterp.history import BoxPtr, ConstPtr, ConstInt, BoxInt,
Box, Const
from rpython.rtyper.lltypesystem import llmemory
@@ -85,30 +86,30 @@
return self.op.getopname()
def getfailarg_set(self):
- args = set()
op = self.getoperation()
+ assert isinstance(op, GuardResOp)
+ args = []
if op.getfailargs():
for arg in op.getfailargs():
- args.add(arg)
+ args.append(arg)
return args
elif op.rd_snapshot:
ss = op.rd_snapshot
- while ss != None:
+ assert isinstance(ss, Snapshot)
+ while ss:
for box in ss.boxes:
- args.add(box)
+ args.append(box)
ss = ss.prev
return args
- #set(target_guard.getoperation().getfailargs())
def relax_guard_to(self, guard):
""" Relaxes a guard operation to an earlier guard. """
- assert self.op.is_guard()
- assert guard.is_guard()
-
tgt_op = self.getoperation()
op = guard
+ assert isinstance(tgt_op, GuardResOp)
+ assert isinstance(op, GuardResOp)
#descr = compile.ResumeAtLoopHeaderDescr()
descr = compile.ResumeAtLoopHeaderDescr()
tgt_op.setdescr(descr)
@@ -357,7 +358,7 @@
if len(def_chain) == 1:
return def_chain[0][0]
else:
- if argcell == None:
+ if not argcell:
return def_chain[-1][0]
else:
assert node is not None
@@ -445,7 +446,7 @@
for arg in op.getarglist():
tracker.define(arg, node)
continue # prevent adding edge to the label itself
- intformod.inspect_operation(node)
+ intformod.inspect_operation(op,node)
# definition of a new variable
if op.result is not None:
# In SSA form. Modifications get a new variable
@@ -461,6 +462,7 @@
self._build_non_pure_dependencies(node, tracker)
# pass 2 correct guard dependencies
for guard_node in self.guards:
+ op = guard_node.getoperation()
self._build_guard_dependencies(guard_node, op.getopnum(), tracker)
# pass 3 find schedulable nodes
jump_node = self.nodes[jump_pos]
@@ -673,14 +675,13 @@
return False
def get_or_create(self, arg):
- var = self.index_vars.get(arg)
+ var = self.index_vars.get(arg, None)
if not var:
var = self.index_vars[arg] = IndexVar(arg)
return var
additive_func_source = """
- def operation_{name}(self, node):
- op = node.op
+ def operation_{name}(self, op, node):
box_r = op.result
if not box_r:
return
@@ -708,8 +709,7 @@
del additive_func_source
multiplicative_func_source = """
- def operation_{name}(self, node):
- op = node.op
+ def operation_{name}(self, op, node):
box_r = op.result
if not box_r:
return
@@ -741,8 +741,7 @@
del multiplicative_func_source
array_access_source = """
- def operation_{name}(self, node):
- op = node.getoperation()
+ def operation_{name}(self, op, node):
descr = op.getdescr()
idx_ref = self.get_or_create(op.getarg(1))
node.memory_ref = MemoryRef(op, idx_ref, {raw_access})
@@ -753,10 +752,6 @@
exec py.code.Source(array_access_source
.format(name='RAW_STORE',raw_access=True)).compile()
exec py.code.Source(array_access_source
- .format(name='GETARRAYITEM_GC',raw_access=False)).compile()
- exec py.code.Source(array_access_source
- .format(name='SETARRAYITEM_GC',raw_access=False)).compile()
- exec py.code.Source(array_access_source
.format(name='GETARRAYITEM_RAW',raw_access=False)).compile()
exec py.code.Source(array_access_source
.format(name='SETARRAYITEM_RAW',raw_access=False)).compile()
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_util.py
b/rpython/jit/metainterp/optimizeopt/test/test_util.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_util.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_util.py
@@ -314,6 +314,13 @@
failargs_limit = 1000
storedebug = None
+class FakeWarmState(object):
+ vectorize = True # default is on
+ def __init__(self, enable_opts):
+ self.enable_opts = enable_opts
+
+class FakeJitDriverStaticData(object):
+ vectorize = False
class FakeMetaInterpStaticData(object):
@@ -364,9 +371,6 @@
class BaseTest(object):
- class DefaultFakeJitDriverStaticData(object):
- vectorize = False
-
def parse(self, s, boxkinds=None, want_fail_descr=True, postprocess=None):
self.oparse = OpParser(s, self.cpu, self.namespace, 'lltype',
boxkinds,
@@ -410,12 +414,12 @@
metainterp_sd.virtualref_info = self.vrefinfo
if hasattr(self, 'callinfocollection'):
metainterp_sd.callinfocollection = self.callinfocollection
- jitdriver_sd = BaseTest.DefaultFakeJitDriverStaticData()
+ jitdriver_sd = FakeJitDriverStaticData()
if hasattr(self, 'jitdriver_sd'):
jitdriver_sd = self.jitdriver_sd
+ warmstate = FakeWarmState(self.enable_opts)
#
- return optimize_trace(metainterp_sd, jitdriver_sd, loop,
- self.enable_opts,
+ return optimize_trace(metainterp_sd, jitdriver_sd, loop, warmstate,
start_state=start_state,
export_state=export_state)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py
b/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py
@@ -779,6 +779,8 @@
def _do_optimize_bridge(self, bridge, call_pure_results):
from rpython.jit.metainterp.optimizeopt import optimize_trace
from rpython.jit.metainterp.optimizeopt.util import args_dict
+ from rpython.jit.metainterp.optimizeopt.test_util import
(FakeWarmState,
+ FakeJitDriverSD)
self.bridge = bridge
bridge.call_pure_results = args_dict()
@@ -791,9 +793,8 @@
if hasattr(self, 'callinfocollection'):
metainterp_sd.callinfocollection = self.callinfocollection
#
- class FakeJitDriverSD(object):
- vectorize = False
- optimize_trace(metainterp_sd, FakeJitDriverSD(), bridge,
self.enable_opts)
+ warmstate = FakeWarmState(self.enable_opts)
+ optimize_trace(metainterp_sd, FakeJitDriverSD(), bridge, warmstate)
def optimize_bridge(self, loops, bridge, expected, expected_target='Loop',
**boxvalues):
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -8,7 +8,7 @@
from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method
from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph,
MemoryRef, Scheduler, SchedulerData, Node)
-from rpython.jit.metainterp.resoperation import (rop, ResOperation)
+from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp)
from rpython.jit.metainterp.resume import Snapshot
from rpython.rlib.debug import debug_print, debug_start, debug_stop
from rpython.jit.metainterp.jitexc import JitException
@@ -24,7 +24,6 @@
print arg,
print
-
def debug_print_operations(loop):
if not we_are_translated():
print('--- loop instr numbered ---')
@@ -46,7 +45,7 @@
opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop,
optimizations)
try:
opt.propagate_all_forward()
- debug_print_operations(loop)
+ #debug_print_operations(loop)
def_opt = Optimizer(metainterp_sd, jitdriver_sd, loop, optimizations)
def_opt.propagate_all_forward()
except NotAVectorizeableLoop:
@@ -68,7 +67,7 @@
self.early_exit = None
self.future_condition = None
- def propagate_all_forward(self):
+ def propagate_all_forward(self, clear=True):
self.clear_newoperations()
label = self.loop.operations[0]
jump = self.loop.operations[-1]
@@ -173,6 +172,7 @@
# to be adjusted. rd_snapshot stores the live variables
# that are needed to resume.
if copied_op.is_guard():
+ assert isinstance(copied_op, GuardResOp)
snapshot = self.clone_snapshot(copied_op.rd_snapshot,
rename_map)
copied_op.rd_snapshot = snapshot
if not we_are_translated():
@@ -293,7 +293,7 @@
def follow_def_uses(self, pack):
assert isinstance(pack, Pair)
savings = -1
- candidate = (-1,-1)
+ candidate = (None,None)
for ldep in pack.left.provides():
for rdep in pack.right.provides():
lnode = ldep.to
@@ -307,6 +307,8 @@
candidate = (lnode, rnode)
#
if savings >= 0:
+ assert candidate[0] is not None
+ assert candidate[1] is not None
self.packset.add_pair(*candidate)
def combine_packset(self):
@@ -336,13 +338,12 @@
break
def schedule(self):
- dprint(self.dependency_graph.as_dot())
self.clear_newoperations()
scheduler = Scheduler(self.dependency_graph, VecScheduleData())
- dprint("scheduling loop. scheduleable are: " +
str(scheduler.schedulable_nodes))
+ #dprint("scheduling loop. scheduleable are: " +
str(scheduler.schedulable_nodes))
while scheduler.has_more():
candidate = scheduler.next()
- dprint(" candidate", candidate, "has pack?", candidate.pack !=
None, "pack", candidate.pack)
+ #dprint(" candidate", candidate, "has pack?", candidate.pack !=
None, "pack", candidate.pack)
if candidate.pack:
pack = candidate.pack
if scheduler.schedulable(pack.operations):
@@ -439,7 +440,7 @@
self.box_to_vbox = {}
def as_vector_operation(self, pack):
- op_count = pack.operations
+ op_count = len(pack.operations)
assert op_count > 1
self.pack = pack
# properties that hold for the pack are:
@@ -447,7 +448,7 @@
op0 = pack.operations[0].getoperation()
assert op0.vector != -1
args = op0.getarglist()[:]
- args.append(ConstInt(len(op_count)))
+ args.append(ConstInt(op_count))
vop = ResOperation(op0.vector, args, op0.result, op0.getdescr())
self._inspect_operation(vop)
return vop
@@ -518,6 +519,7 @@
"""
if l_op.getopnum() == r_op.getopnum():
return True
+ return False
class PackSet(object):
@@ -569,8 +571,6 @@
if not must_unpack_result_to_exec(lpacknode, lnode) and \
not must_unpack_result_to_exec(rpacknode, rnode):
savings += 1
- if savings >= 0:
- dprint("estimated " + str(savings) + " for lpack,lnode",
lpacknode, lnode)
return savings
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit