Author: Richard Plangger <r...@pasra.at> Branch: vecopt2 Changeset: r77124:1f1fd65e76ab Date: 2015-04-24 18:43 +0200 http://bitbucket.org/pypy/pypy/changeset/1f1fd65e76ab/
Log: changes to make the rtyper work correctly, SIMD loads now only from is now aligned (not correct, just for testing) diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -2434,7 +2434,6 @@ def _vec_load(self, resloc, src_addr, integer, itemsize, aligned): if integer: if aligned: - raise NotImplementedError self.mc.MOVDQA(resloc, src_addr) else: self.mc.MOVDQU(resloc, src_addr) @@ -2461,7 +2460,7 @@ def _vec_store(self, dest_loc, value_loc, integer, itemsize, aligned): if integer: if aligned: - raise NotImplementedError + self.mc.MOVDQA(dest_loc, value_loc) else: self.mc.MOVDQU(dest_loc, value_loc) else: @@ -2473,7 +2472,11 @@ def genop_vec_int_add(self, op, arglocs, resloc): loc0, loc1, itemsize_loc = arglocs itemsize = itemsize_loc.value - if itemsize == 4: + if itemsize == 1: + self.mc.PADDB(loc0, loc1) + elif itemsize == 2: + self.mc.PADDW(loc0, loc1) + elif itemsize == 4: self.mc.PADDD(loc0, loc1) elif itemsize == 8: self.mc.PADDQ(loc0, loc1) diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -1466,7 +1466,7 @@ not descr.is_array_of_structs() itemsize, ofs, _ = unpack_arraydescr(descr) integer = not descr.is_array_of_floats() - aligned = False + aligned = True args = op.getarglist() base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args) ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args) @@ -1487,7 +1487,7 @@ ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args) integer = not descr.is_array_of_floats() - aligned = False + aligned = True self.perform_discard(op, [base_loc, ofs_loc, value_loc, imm(itemsize), imm(ofs), imm(integer), imm(aligned)]) diff --git a/rpython/jit/backend/x86/regloc.py b/rpython/jit/backend/x86/regloc.py --- a/rpython/jit/backend/x86/regloc.py +++ b/rpython/jit/backend/x86/regloc.py @@ -656,6 +656,7 @@ MOVSD = _binaryop('MOVSD') MOVAPD = _binaryop('MOVAPD') + MOVDQA = _binaryop('MOVDQA') MOVDQU = _binaryop('MOVDQU') ADDSD = _binaryop('ADDSD') ADDPD = _binaryop('ADDPD') @@ -675,6 +676,8 @@ PADDQ = _binaryop('PADDQ') PADDD = _binaryop('PADDD') + PADDW = _binaryop('PADDW') + PADDB = _binaryop('PADDB') PSUBQ = _binaryop('PSUBQ') PAND = _binaryop('PAND') POR = _binaryop('POR') diff --git a/rpython/jit/backend/x86/test/test_vectorize.py b/rpython/jit/backend/x86/test/test_vectorize.py --- a/rpython/jit/backend/x86/test/test_vectorize.py +++ b/rpython/jit/backend/x86/test/test_vectorize.py @@ -26,14 +26,13 @@ ptr[1] = rffi.r_int(b) ptr[2] = rffi.r_int(c) ptr[3] = rffi.r_int(d) - return ConstAddressLoc(adr,4) + return adr def test_simple_4_int_load_sum_x86_64(self): def callback(asm): if asm.mc.WORD != 8: py.test.skip() - loc = self.imm_4_int32(123,543,0,0) - adr = loc.value + adr = self.imm_4_int32(123,543,0,0) asm.mc.MOV_ri(r8.value,adr) asm.mc.MOVDQU_xm(xmm7.value, (r8.value, 0)) asm.mc.PADDD_xm(xmm7.value, (r8.value, 0)) @@ -55,8 +54,8 @@ def test_vector_store(self): def callback(asm): - loc = self.imm_4_int32(11,12,13,14) - asm.mov(ImmedLoc(loc.value), ecx) + addr = self.imm_4_int32(11,12,13,14) + asm.mov(ImmedLoc(addr), ecx) asm.mc.MOVDQU_xm(xmm6.value, (ecx.value,0)) asm.mc.PADDD_xm(xmm6.value, (ecx.value,0)) asm.mc.MOVDQU(AddressLoc(ecx,ImmedLoc(0)), xmm6) @@ -65,3 +64,17 @@ res = self.do_test(callback) & 0xffffffff assert res == 22 + + + def test_vector_store_aligned(self): + def callback(asm): + addr = self.imm_4_int32(11,12,13,14) + asm.mov(ImmedLoc(addr), ecx) + asm.mc.MOVDQA(xmm6, AddressLoc(ecx,ImmedLoc(0))) + asm.mc.PADDD_xm(xmm6.value, (ecx.value,0)) + asm.mc.MOVDQA(AddressLoc(ecx,ImmedLoc(0)), xmm6) + asm.mc.MOVDQA(xmm6, AddressLoc(ecx,ImmedLoc(0))) + asm.mc.MOVDQ_rx(eax.value, xmm6.value) + + res = self.do_test(callback) & 0xffffffff + assert res == 22 diff --git a/rpython/jit/metainterp/optimizeopt/__init__.py b/rpython/jit/metainterp/optimizeopt/__init__.py --- a/rpython/jit/metainterp/optimizeopt/__init__.py +++ b/rpython/jit/metainterp/optimizeopt/__init__.py @@ -68,8 +68,7 @@ loop.operations) optimizations, unroll = build_opt_chain(metainterp_sd, enable_opts) if warmstate.vectorize and jitdriver_sd.vectorize: - optimize_vector(metainterp_sd, jitdriver_sd, loop, - optimizations) + optimize_vector(metainterp_sd, jitdriver_sd, loop, optimizations) elif unroll: return optimize_unroll(metainterp_sd, jitdriver_sd, loop, optimizations, inline_short_preamble, diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py --- a/rpython/jit/metainterp/optimizeopt/dependency.py +++ b/rpython/jit/metainterp/optimizeopt/dependency.py @@ -2,7 +2,8 @@ from rpython.jit.metainterp import compile from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method -from rpython.jit.metainterp.resoperation import rop +from rpython.jit.metainterp.resoperation import (rop, GuardResOp) +from rpython.jit.metainterp.resume import Snapshot from rpython.jit.codewriter.effectinfo import EffectInfo from rpython.jit.metainterp.history import BoxPtr, ConstPtr, ConstInt, BoxInt, Box, Const from rpython.rtyper.lltypesystem import llmemory @@ -85,30 +86,30 @@ return self.op.getopname() def getfailarg_set(self): - args = set() op = self.getoperation() + assert isinstance(op, GuardResOp) + args = [] if op.getfailargs(): for arg in op.getfailargs(): - args.add(arg) + args.append(arg) return args elif op.rd_snapshot: ss = op.rd_snapshot - while ss != None: + assert isinstance(ss, Snapshot) + while ss: for box in ss.boxes: - args.add(box) + args.append(box) ss = ss.prev return args - #set(target_guard.getoperation().getfailargs()) def relax_guard_to(self, guard): """ Relaxes a guard operation to an earlier guard. """ - assert self.op.is_guard() - assert guard.is_guard() - tgt_op = self.getoperation() op = guard + assert isinstance(tgt_op, GuardResOp) + assert isinstance(op, GuardResOp) #descr = compile.ResumeAtLoopHeaderDescr() descr = compile.ResumeAtLoopHeaderDescr() tgt_op.setdescr(descr) @@ -357,7 +358,7 @@ if len(def_chain) == 1: return def_chain[0][0] else: - if argcell == None: + if not argcell: return def_chain[-1][0] else: assert node is not None @@ -445,7 +446,7 @@ for arg in op.getarglist(): tracker.define(arg, node) continue # prevent adding edge to the label itself - intformod.inspect_operation(node) + intformod.inspect_operation(op,node) # definition of a new variable if op.result is not None: # In SSA form. Modifications get a new variable @@ -461,6 +462,7 @@ self._build_non_pure_dependencies(node, tracker) # pass 2 correct guard dependencies for guard_node in self.guards: + op = guard_node.getoperation() self._build_guard_dependencies(guard_node, op.getopnum(), tracker) # pass 3 find schedulable nodes jump_node = self.nodes[jump_pos] @@ -673,14 +675,13 @@ return False def get_or_create(self, arg): - var = self.index_vars.get(arg) + var = self.index_vars.get(arg, None) if not var: var = self.index_vars[arg] = IndexVar(arg) return var additive_func_source = """ - def operation_{name}(self, node): - op = node.op + def operation_{name}(self, op, node): box_r = op.result if not box_r: return @@ -708,8 +709,7 @@ del additive_func_source multiplicative_func_source = """ - def operation_{name}(self, node): - op = node.op + def operation_{name}(self, op, node): box_r = op.result if not box_r: return @@ -741,8 +741,7 @@ del multiplicative_func_source array_access_source = """ - def operation_{name}(self, node): - op = node.getoperation() + def operation_{name}(self, op, node): descr = op.getdescr() idx_ref = self.get_or_create(op.getarg(1)) node.memory_ref = MemoryRef(op, idx_ref, {raw_access}) @@ -753,10 +752,6 @@ exec py.code.Source(array_access_source .format(name='RAW_STORE',raw_access=True)).compile() exec py.code.Source(array_access_source - .format(name='GETARRAYITEM_GC',raw_access=False)).compile() - exec py.code.Source(array_access_source - .format(name='SETARRAYITEM_GC',raw_access=False)).compile() - exec py.code.Source(array_access_source .format(name='GETARRAYITEM_RAW',raw_access=False)).compile() exec py.code.Source(array_access_source .format(name='SETARRAYITEM_RAW',raw_access=False)).compile() diff --git a/rpython/jit/metainterp/optimizeopt/test/test_util.py b/rpython/jit/metainterp/optimizeopt/test/test_util.py --- a/rpython/jit/metainterp/optimizeopt/test/test_util.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_util.py @@ -314,6 +314,13 @@ failargs_limit = 1000 storedebug = None +class FakeWarmState(object): + vectorize = True # default is on + def __init__(self, enable_opts): + self.enable_opts = enable_opts + +class FakeJitDriverStaticData(object): + vectorize = False class FakeMetaInterpStaticData(object): @@ -364,9 +371,6 @@ class BaseTest(object): - class DefaultFakeJitDriverStaticData(object): - vectorize = False - def parse(self, s, boxkinds=None, want_fail_descr=True, postprocess=None): self.oparse = OpParser(s, self.cpu, self.namespace, 'lltype', boxkinds, @@ -410,12 +414,12 @@ metainterp_sd.virtualref_info = self.vrefinfo if hasattr(self, 'callinfocollection'): metainterp_sd.callinfocollection = self.callinfocollection - jitdriver_sd = BaseTest.DefaultFakeJitDriverStaticData() + jitdriver_sd = FakeJitDriverStaticData() if hasattr(self, 'jitdriver_sd'): jitdriver_sd = self.jitdriver_sd + warmstate = FakeWarmState(self.enable_opts) # - return optimize_trace(metainterp_sd, jitdriver_sd, loop, - self.enable_opts, + return optimize_trace(metainterp_sd, jitdriver_sd, loop, warmstate, start_state=start_state, export_state=export_state) diff --git a/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py b/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py --- a/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py @@ -779,6 +779,8 @@ def _do_optimize_bridge(self, bridge, call_pure_results): from rpython.jit.metainterp.optimizeopt import optimize_trace from rpython.jit.metainterp.optimizeopt.util import args_dict + from rpython.jit.metainterp.optimizeopt.test_util import (FakeWarmState, + FakeJitDriverSD) self.bridge = bridge bridge.call_pure_results = args_dict() @@ -791,9 +793,8 @@ if hasattr(self, 'callinfocollection'): metainterp_sd.callinfocollection = self.callinfocollection # - class FakeJitDriverSD(object): - vectorize = False - optimize_trace(metainterp_sd, FakeJitDriverSD(), bridge, self.enable_opts) + warmstate = FakeWarmState(self.enable_opts) + optimize_trace(metainterp_sd, FakeJitDriverSD(), bridge, warmstate) def optimize_bridge(self, loops, bridge, expected, expected_target='Loop', **boxvalues): diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -8,7 +8,7 @@ from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph, MemoryRef, Scheduler, SchedulerData, Node) -from rpython.jit.metainterp.resoperation import (rop, ResOperation) +from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp) from rpython.jit.metainterp.resume import Snapshot from rpython.rlib.debug import debug_print, debug_start, debug_stop from rpython.jit.metainterp.jitexc import JitException @@ -24,7 +24,6 @@ print arg, print - def debug_print_operations(loop): if not we_are_translated(): print('--- loop instr numbered ---') @@ -46,7 +45,7 @@ opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, optimizations) try: opt.propagate_all_forward() - debug_print_operations(loop) + #debug_print_operations(loop) def_opt = Optimizer(metainterp_sd, jitdriver_sd, loop, optimizations) def_opt.propagate_all_forward() except NotAVectorizeableLoop: @@ -68,7 +67,7 @@ self.early_exit = None self.future_condition = None - def propagate_all_forward(self): + def propagate_all_forward(self, clear=True): self.clear_newoperations() label = self.loop.operations[0] jump = self.loop.operations[-1] @@ -173,6 +172,7 @@ # to be adjusted. rd_snapshot stores the live variables # that are needed to resume. if copied_op.is_guard(): + assert isinstance(copied_op, GuardResOp) snapshot = self.clone_snapshot(copied_op.rd_snapshot, rename_map) copied_op.rd_snapshot = snapshot if not we_are_translated(): @@ -293,7 +293,7 @@ def follow_def_uses(self, pack): assert isinstance(pack, Pair) savings = -1 - candidate = (-1,-1) + candidate = (None,None) for ldep in pack.left.provides(): for rdep in pack.right.provides(): lnode = ldep.to @@ -307,6 +307,8 @@ candidate = (lnode, rnode) # if savings >= 0: + assert candidate[0] is not None + assert candidate[1] is not None self.packset.add_pair(*candidate) def combine_packset(self): @@ -336,13 +338,12 @@ break def schedule(self): - dprint(self.dependency_graph.as_dot()) self.clear_newoperations() scheduler = Scheduler(self.dependency_graph, VecScheduleData()) - dprint("scheduling loop. scheduleable are: " + str(scheduler.schedulable_nodes)) + #dprint("scheduling loop. scheduleable are: " + str(scheduler.schedulable_nodes)) while scheduler.has_more(): candidate = scheduler.next() - dprint(" candidate", candidate, "has pack?", candidate.pack != None, "pack", candidate.pack) + #dprint(" candidate", candidate, "has pack?", candidate.pack != None, "pack", candidate.pack) if candidate.pack: pack = candidate.pack if scheduler.schedulable(pack.operations): @@ -439,7 +440,7 @@ self.box_to_vbox = {} def as_vector_operation(self, pack): - op_count = pack.operations + op_count = len(pack.operations) assert op_count > 1 self.pack = pack # properties that hold for the pack are: @@ -447,7 +448,7 @@ op0 = pack.operations[0].getoperation() assert op0.vector != -1 args = op0.getarglist()[:] - args.append(ConstInt(len(op_count))) + args.append(ConstInt(op_count)) vop = ResOperation(op0.vector, args, op0.result, op0.getdescr()) self._inspect_operation(vop) return vop @@ -518,6 +519,7 @@ """ if l_op.getopnum() == r_op.getopnum(): return True + return False class PackSet(object): @@ -569,8 +571,6 @@ if not must_unpack_result_to_exec(lpacknode, lnode) and \ not must_unpack_result_to_exec(rpacknode, rnode): savings += 1 - if savings >= 0: - dprint("estimated " + str(savings) + " for lpack,lnode", lpacknode, lnode) return savings _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit