Author: Richard Plangger <r...@pasra.at> Branch: vecopt2 Changeset: r77127:8f307136e6c5 Date: 2015-04-30 09:54 +0200 http://bitbucket.org/pypy/pypy/changeset/8f307136e6c5/
Log: added a new test to collapse guards, I plan to restructure this and make it on the level of a dependency graph each Node class now has the scheduled index as property diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -1497,7 +1497,7 @@ def consider_vec_int_add(self, op): count = op.getarg(2) assert isinstance(count, ConstInt) - itemsize = 16 // count.value + itemsize = self.assembler.cpu.vector_register_size // count.value args = op.getarglist() loc1 = self.xrm.make_sure_var_in_reg(op.getarg(1), args) loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(0), args) @@ -1508,8 +1508,10 @@ # done on the vector register, if there is a wrap around, # it is lost, because the register does not have enough bits # to save it. - argloc = self.loc(op.getarg(0)) - self.force_allocate_reg(op.result, selected_reg=argloc) + #argloc = self.loc(op.getarg(0)) + self.xrm.force_result_in_reg(op.result, op.getarg(0)) + if op.getarg(1).value != op.getarg(2).value: + raise NotImplementedError("signext not implemented") def consider_guard_early_exit(self, op): pass diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py --- a/rpython/jit/metainterp/optimizeopt/dependency.py +++ b/rpython/jit/metainterp/optimizeopt/dependency.py @@ -74,6 +74,7 @@ self.memory_ref = None self.pack = None self.emitted = False + self.schedule_position = -1 def getoperation(self): return self.op @@ -643,16 +644,17 @@ del self.schedulable_nodes[index] self.schedulable_nodes.append(node) - def schedule_all(self, opindices): + def schedule_all(self, opindices, position): while len(opindices) > 0: opidx = opindices.pop() for i,node in enumerate(self.schedulable_nodes): if node == opidx: - self.schedule(i) + self.schedule(i, position) break - def schedule(self, index): + def schedule(self, index, position): node = self.schedulable_nodes[index] + node.schedule_position = position del self.schedulable_nodes[index] to_del = [] for dep in node.provides()[:]: # COPY diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py @@ -973,5 +973,27 @@ vopt = self.schedule(self.parse_loop(ops),1) self.assert_equal(vopt.loop, self.parse_loop(opt)) + def test_collapse_index_guard_1(self): + ops = """ + [p0,i0] + guard_early_exit() [] + i1 = getarrayitem_raw(p0, i0, descr=intarraydescr) + i2 = int_add(i0, 1) + i3 = int_lt(i2, 102) + guard_true(i3) [p0,i0] + jump(p0,i2) + """ + opt=""" + [p0,i0] + i2 = int_add(i0, 16) + i3 = int_lt(i2, 102) + guard_true(i3) [p0,i0] + i1 = vec_getarrayitem_raw(p0, i0, 16, descr=intarraydescr) + jump(p0,i2) + """ + vopt = self.schedule(self.parse_loop(ops),15) + self.assert_equal(vopt.loop, self.parse_loop(opt)) + + class TestLLtype(BaseTestVectorize, LLtypeMixin): pass diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -96,10 +96,12 @@ self.find_adjacent_memory_refs() self.extend_packset() self.combine_packset() + self.collapse_index_guards() self.schedule() def emit_operation(self, op): - if op.getopnum() == rop.GUARD_EARLY_EXIT: + if op.getopnum() == rop.GUARD_EARLY_EXIT or \ + op.getopnum() == rop.DEBUG_MERGE_POINT: return self._last_emitted_op = op self._newoperations.append(op) @@ -261,8 +263,8 @@ operations = loop.operations self.packset = PackSet(self.dependency_graph, operations, - self.unroll_count, - self.smallest_type_bytes) + self.unroll_count, + self.smallest_type_bytes) memory_refs = self.dependency_graph.memory_refs.items() # initialize the pack set for node_a,memref_a in memory_refs: @@ -354,20 +356,22 @@ pack = candidate.pack if scheduler.schedulable(pack.operations): vop = scheduler.sched_data.as_vector_operation(pack) + position = len(self._newoperations) self.emit_operation(vop) - scheduler.schedule_all(pack.operations) + scheduler.schedule_all(pack.operations, position) else: scheduler.schedule_later(0) else: + if candidate.getopnum() == rop.GUARD_EARLY_EXIT: + pass + position = len(self._newoperations) self.emit_operation(candidate.getoperation()) - scheduler.schedule(0) + scheduler.schedule(0, position) if not we_are_translated(): for node in self.dependency_graph.nodes: assert node.emitted self.loop.operations = self._newoperations[:] - #self.collapse_index_guards() - #self.clear_newoperations() def relax_index_guards(self): label_idx = 0 @@ -411,24 +415,25 @@ guard_node.relax_guard_to(self.future_condition) def collapse_index_guards(self): - final_ops = [] - last_guard = None - is_after_relax = False - for op in self._newoperations: - if op.getopnum() == rop.GUARD_EARLY_EXIT: - assert last_guard is not None - final_ops.append(last_guard) - is_after_relax = True - continue - if not is_after_relax: - if op.is_guard(): - last_guard = op - else: - final_ops.append(op) - else: - final_ops.append(op) - assert is_after_relax - return final_ops + pass + #final_ops = [] + #last_guard = None + #is_after_relax = False + #for op in self._newoperations: + # if op.getopnum() == rop.GUARD_EARLY_EXIT: + # assert last_guard is not None + # final_ops.append(last_guard) + # is_after_relax = True + # continue + # if not is_after_relax: + # if op.is_guard(): + # last_guard = op + # else: + # final_ops.append(op) + # else: + # final_ops.append(op) + #assert is_after_relax + #return final_ops def must_unpack_result_to_exec(op, target_op): diff --git a/rpython/jit/metainterp/test/test_vectorize.py b/rpython/jit/metainterp/test/test_vectorize.py --- a/rpython/jit/metainterp/test/test_vectorize.py +++ b/rpython/jit/metainterp/test/test_vectorize.py @@ -64,23 +64,23 @@ myjitdriver = JitDriver(greens = [], reds = ['i','d','va','vb','vc'], vectorize=True) - ET = rffi.SIGNED - T = lltype.Array(ET, hints={'nolength': True}) + T = lltype.Array(rffi.INT, hints={'nolength': True}) def f(d): i = 0 va = lltype.malloc(T, d, flavor='raw', zero=True) vb = lltype.malloc(T, d, flavor='raw', zero=True) vc = lltype.malloc(T, d, flavor='raw', zero=True) for j in range(d): - va[j] = j - vb[j] = j + va[j] = rffi.r_int(j) + vb[j] = rffi.r_int(j) while i < d: myjitdriver.can_enter_jit(i=i, d=d, va=va, vb=vb, vc=vc) myjitdriver.jit_merge_point(i=i, d=d, va=va, vb=vb, vc=vc) a = va[i] b = vb[i] - vc[i] = a+b + ec = intmask(a)+intmask(b) + vc[i] = rffi.r_int(ec) i += 1 res = 0 _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit