Author: Richard Plangger <r...@pasra.at> Branch: vecopt2 Changeset: r77114:2c2953e1a8d5 Date: 2015-04-13 16:14 +0200 http://bitbucket.org/pypy/pypy/changeset/2c2953e1a8d5/
Log: extend packset now checks both sides of the pack diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py --- a/rpython/jit/metainterp/optimizeopt/dependency.py +++ b/rpython/jit/metainterp/optimizeopt/dependency.py @@ -70,6 +70,7 @@ self.adjacent_list_back = [] self.memory_ref = None self.pack = None + self.emitted = False def getoperation(self): return self.op @@ -84,6 +85,17 @@ def getopname(self): return self.op.getopname() + def relax_guard_to(self, guard): + """ Relaxes a guard operation to an earlier guard. """ + assert self.op.is_guard() + assert guard.op.is_guard() + + my_op = self.getoperation() + op = guard.getoperation() + my_op.setdescr(op.getdescr()) + my_op.setfailargs(op.getfailargs()) + my_op.rd_snapshot = op.rd_snapshot + def edge_to(self, to, arg=None, label=None): assert self != to dep = self.depends_on(to) @@ -165,7 +177,7 @@ return self.adjacent_list_back def dependencies(self): - return self.adjacent_list[:] + self.adjacent_list_back[:] + return self.adjacent_list[:] + self.adjacent_list_back[:] # COPY def is_after(self, other): return self.opidx > other.opidx @@ -441,7 +453,6 @@ # points to jump_op. this forces the jump/finish op to be the last operation if node.provides_count() == 0: node.edge_to(jump_node, None, label='jump') - print "\n\neee", self.schedulable_nodes def _build_guard_dependencies(self, guard_node, guard_opnum, tracker): if guard_opnum >= rop.GUARD_NOT_INVALIDATED: @@ -600,12 +611,14 @@ def schedulable(self, indices): for index in indices: if index not in self.schedulable_nodes: + print "pack", index, "not sched" break else: return True return False def schedule_later(self, index): + assert len(self.schedulable_nodes) != 1, "not possible! " + str(self.schedulable_nodes[0].getoperation()) node = self.schedulable_nodes[index] del self.schedulable_nodes[index] self.schedulable_nodes.append(node) @@ -620,15 +633,17 @@ def schedule(self, index): node = self.schedulable_nodes[index] + assert not node.emitted del self.schedulable_nodes[index] to_del = [] print " schedule", node.getoperation() - for dep in node.provides()[:]: + for dep in node.provides()[:]: # COPY node.remove_edge_to(dep.to) print " >=X=>", node, dep.to, "count",dep.to.depends_count() if dep.to.depends_count() == 0: self.schedulable_nodes.append(dep.to) node.clear_dependencies() + node.emitted = True class IntegralForwardModification(object): """ Calculates integral modifications on an integer box. """ diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py --- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py @@ -105,7 +105,10 @@ for i,op in enumerate(loop.operations): print "[",i,"]",op, if op.is_guard(): - print op.rd_snapshot.boxes + if op.rd_snapshot: + print op.rd_snapshot.boxes + else: + print op.getfailargs() else: print "" diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py @@ -683,6 +683,7 @@ def test_packset_extend_load_modify_store(self): ops = """ [p0,i0] + guard_no_early_exit() [] i1 = int_add(i0, 1) i2 = int_le(i1, 16) guard_true(i2) [p0, i0] @@ -694,12 +695,12 @@ loop = self.parse_loop(ops) vopt = self.extend_packset(loop,1) assert len(vopt.dependency_graph.memory_refs) == 4 - self.assert_independent(4,10) self.assert_independent(5,11) self.assert_independent(6,12) + self.assert_independent(7,13) assert len(vopt.packset.packs) == 3 self.assert_packset_empty(vopt.packset, len(loop.operations), - [(5,11), (4,10), (6,12)]) + [(6,12), (5,11), (7,13)]) @pytest.mark.parametrize("descr", ['char','float','int','singlefloat']) def test_packset_combine_simple(self,descr): @@ -810,9 +811,6 @@ loop = self.parse_loop(ops) vopt = self.combine_packset(loop,3) assert len(vopt.dependency_graph.memory_refs) == 12 - if len(vopt.packset.packs) != 4: - for pack in vopt.packset.packs: - print vopt.packset.packs assert len(vopt.packset.packs) == 4 for opindices in [(4,11,18,25),(5,12,19,26), @@ -836,6 +834,7 @@ def test_schedule_vector_operation(self, op, descr, stride): ops = """ [p0,p1,p2,i0] # 0 + guard_no_early_exit() [] i10 = int_le(i0, 128) # 1, 8, 15, 22 guard_true(i10) [p0,p1,p2,i0] # 2, 9, 16, 23 i2 = getarrayitem_gc(p0, i0, descr={descr}arraydescr) # 3, 10, 17, 24 @@ -848,13 +847,14 @@ vops = """ [p0,p1,p2,i0] i10 = int_le(i0, 128) - guard_true(i10) [p0,p1,p2,i0] + guard_true(i10) [] i1 = int_add(i0, {stride}) i11 = int_le(i1, 128) - guard_true(i11) [p0,p1,p2,i0] + guard_true(i11) [] + i12 = int_add(i1, {stride}) + guard_no_early_exit() [] v1 = vec_raw_load(p0, i0, 2, descr={descr}arraydescr) v2 = vec_raw_load(p1, i0, 2, descr={descr}arraydescr) - i12 = int_add(i1, {stride}) v3 = {op}(v1,v2) vec_raw_store(p2, i0, v3, 2, descr={descr}arraydescr) jump(p0,p1,p2,i12) @@ -884,8 +884,10 @@ def test_vectorize_raw_load_mul_index(self): + pytest.skip("") ops = """ [i0, i1, i2, i3, i4, i5, i6, i7] + guard_no_early_exit() [] i9 = int_mul(i0, 8) i10 = raw_load(i3, i9, descr=intarraydescr) i11 = int_mul(i0, 8) @@ -901,11 +903,10 @@ """ vopt = self.schedule(self.parse_loop(ops),1) - def test_123(self): + def test_vschedule_trace_1(self): ops = """ [i0, i1, i2, i3, i4] guard_no_early_exit() [] - debug_merge_point(0, 0, '1') i6 = int_mul(i0, 8) i7 = raw_load(i2, i6, descr=intarraydescr) i8 = raw_load(i3, i6, descr=intarraydescr) @@ -914,13 +915,30 @@ i11 = int_add(i0, 1) i12 = int_lt(i11, i1) guard_true(i12) [i4, i3, i2, i1, i11] - debug_merge_point(0, 0, '2') jump(i11, i1, i2, i3, i4) """ + opt=""" + [i0, i1, i2, i3, i4] + i11 = int_add(i0, 1) + i12 = int_lt(i11, i1) + guard_true(i12) [] + i14 = int_mul(i11, 8) + i13 = int_add(i11, 1) + i18 = int_lt(i13, i1) + guard_true(i18) [] + guard_no_early_exit() [] + i6 = int_mul(i0, 8) + v19 = vec_raw_load(i2, i6, 2, descr=intarraydescr) + v20 = vec_raw_load(i3, i6, 2, descr=intarraydescr) + v21 = vec_int_add(v19, v20) + vec_raw_store(i4, i6, v21, 2, descr=intarraydescr) + jump(i13, i1, i2, i3, i4) + """ vopt = self.schedule(self.parse_loop(ops),1) - self.debug_print_operations(vopt.loop) + self.assert_equal(vopt.loop, self.parse_loop(opt)) - def test_schedule_vectorized_trace_1(self): + def test_vschedule_trace_2(self): + pytest.skip() ops = """ [i0, i1, i2, i3, i4, i5, i6, i7] guard_no_early_exit() [] @@ -935,8 +953,27 @@ guard_future_condition() [] jump(i12, i8, i9, i3, i4, i5, i10, i7) """ + opt = """ + [i0, i1, i2, i3, i4, i5, i6, i7] + i12 = int_add(i0, 8) + i14 = int_mul(i7, 8) + i20 = int_mul(i7, 8) + i15 = int_lt(i12, i14) + guard_true(i15) [] + i16 = int_add(i12, 8) + i21 = int_lt(i16, i20) + guard_true(i21) [] + guard_no_early_exit() [] + v22 = vec_raw_load(i3, i0, 2, descr=intarraydescr) + v23 = vec_raw_load(i4, i0, 2, descr=intarraydescr) + v24 = vec_int_add(v22, v23) + vec_raw_store(i5, i0, v24, 2, descr=intarraydescr) + i17 = vec_unpack(v22, 0) + i18 = vec_unpack(v22, 1) + jump(i16, i17, i18, i3, i4, i5, i19, i7) + """ vopt = self.schedule(self.parse_loop(ops),1) - self.debug_print_operations(vopt.loop) + self.assert_equal(vopt.loop, self.parse_loop(opt)) class TestLLtype(BaseTestVectorize, LLtypeMixin): pass diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -33,18 +33,6 @@ else: print "" -def must_unpack_result_to_exec(op, target_op): - # TODO either move to resop or util - if op.getoperation().vector != -1: - return False - return True - -def prohibit_packing(op1, op2): - if op2.is_array_op(): - if op2.getarg(1) == op1.result: - return True - return False - def optimize_vector(metainterp_sd, jitdriver_sd, loop, optimizations): opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, optimizations) try: @@ -246,7 +234,7 @@ def build_dependency_graph(self): self.dependency_graph = DependencyGraph(self.loop.operations) - self.relax_guard_dependencies() + self.relax_index_guards() def find_adjacent_memory_refs(self): """ the pre pass already builds a hash of memory references and the @@ -341,14 +329,19 @@ i += 1 if len_before == len(self.packset.packs): break + if not we_are_translated(): + print "packs:" + for pack in self.packset.packs: + print " P:", pack def schedule(self): self.clear_newoperations() scheduler = Scheduler(self.dependency_graph, VecScheduleData()) print "scheduling loop" + i = 100 while scheduler.has_more(): candidate = scheduler.next() - print " candidate", candidate + print " candidate", candidate, "has pack?", candidate.pack != None, "pack", candidate.pack if candidate.pack: pack = candidate.pack if scheduler.schedulable(pack.operations): @@ -360,12 +353,18 @@ else: self.emit_operation(candidate.getoperation()) scheduler.schedule(0) + i += 1 + if i > 200: + assert False self.loop.operations = self._newoperations[:] + if not we_are_translated(): + for node in self.dependency_graph.nodes: + assert node.emitted - def relax_guard_dependencies(self): + def relax_index_guards(self): + label_idx = 0 early_exit_idx = 1 - label_idx = 0 label = self.dependency_graph.getnode(label_idx) ee_guard = self.dependency_graph.getnode(early_exit_idx) if not ee_guard.getopnum() == rop.GUARD_NO_EARLY_EXIT: @@ -400,12 +399,27 @@ guard_node.edge_to(ee_guard, label='pullup') label.remove_edge_to(ee_guard) + guard_node.relax_guard_to(ee_guard) + +def must_unpack_result_to_exec(op, target_op): + # TODO either move to resop or util + if op.getoperation().vector != -1: + return False + return True + +def prohibit_packing(op1, op2): + if op1.is_array_op(): + if op1.getarg(1) == op2.result: + print "prohibit", op1, op2 + return True + return False + def fail_args_break_dependency(guard, prev_op, target_guard): failargs = set(guard.getoperation().getfailargs()) new_failargs = set(target_guard.getoperation().getfailargs()) op = prev_op.getoperation() - if not op.has_no_side_effect(): + if not op.is_always_pure(): # TODO has_no_side_effect(): return True if op.result is not None: arg = op.result @@ -544,21 +558,27 @@ """ savings = -1 - # without loss of generatlity: only check 'left' operation lpacknode = pack.left - if prohibit_packing(lnode.getoperation(), lpacknode.getoperation()): + if prohibit_packing(lpacknode.getoperation(), lnode.getoperation()): + return -1 + rpacknode = pack.right + if prohibit_packing(rpacknode.getoperation(), rnode.getoperation()): return -1 if not expand_forward: #print " backward savings", savings - if not must_unpack_result_to_exec(lpacknode, lnode): + if not must_unpack_result_to_exec(lpacknode, lnode) and \ + not must_unpack_result_to_exec(rpacknode, rnode): savings += 1 #print " => backward savings", savings else: #print " forward savings", savings - if not must_unpack_result_to_exec(lpacknode, lnode): + if not must_unpack_result_to_exec(lpacknode, lnode) and \ + not must_unpack_result_to_exec(rpacknode, rnode): savings += 1 #print " => forward savings", savings + if savings >= 0: + print "estimated " + str(savings) + " for lpack,lnode", lpacknode, lnode return savings @@ -567,10 +587,14 @@ is not iterated when calling this method. """ pack_i = self.packs[i] pack_j = self.packs[j] + pack_i.clear() + pack_j.clear() operations = pack_i.operations for op in pack_j.operations[1:]: operations.append(op) self.packs[i] = Pack(operations) + + # instead of deleting an item in the center of pack array, # the last element is assigned to position j and # the last slot is freed. Order of packs doesn't matter @@ -600,6 +624,10 @@ for node in self.operations: node.pack = self + def clear(self): + for node in self.operations: + node.pack = None + def rightmost_match_leftmost(self, other): assert isinstance(other, Pack) rightmost = self.operations[-1] _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit